DatetimeArray#

class pandas::DatetimeArray#

Extension array type for specialized data storage.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use DatetimeArray
DatetimeArray obj;
// ... operations ...

Constructors#

Signature

Location

Example

DatetimeArray(const numpy::NDArray<numpy::datetime64>& data, const numpy::NDArray<numpy::bool_>& mask, std::shared_ptr<numpy::TimezoneInfo> tz = nullptr, bool copy = false)

pd_datetime_array.h:97

DatetimeArray(const numpy::NDArray<int64_t>& data, const numpy::NDArray<numpy::bool_>& mask, numpy::DateTimeUnit unit, std::shared_ptr<numpy::TimezoneInfo> tz = nullptr, bool copy = false)

pd_datetime_array.h:123

explicit DatetimeArray(const numpy::NDArray<numpy::datetime64>& data, std::shared_ptr<numpy::TimezoneInfo> tz = nullptr)

pd_datetime_array.h:141

explicit DatetimeArray(const std::vector<std::optional<numpy::datetime64>>& values, std::shared_ptr<numpy::TimezoneInfo> tz = nullptr)

pd_datetime_array.h:189

explicit DatetimeArray(const std::vector<std::string>& iso_strings, std::shared_ptr<numpy::TimezoneInfo> tz = nullptr)

pd_datetime_array.h:210

Construction#

Signature

Return Type

Location

Example

static DatetimeArray from_sequence(const std::vector<std::optional<numpy::datetime64>>& scalars, std::shared_ptr<numpy::TimezoneInfo> tz = nullptr)

static DatetimeArray

pd_datetime_array.h:486

static DatetimeArray from_timestamps( const std::vector<std::optional<Ts>>& vec, const std::string& uniform_tz = "")

DatetimeArray

pd_datetime_array.h:256

View

Indexing / Selection#

Signature

Return Type

Location

Example

numpy::datetime64 at(size_t index) const

numpy::datetime64

pd_datetime_array.h:393

View

const numpy::NDArray<numpy::bool_>& mask() const

const numpy::NDArray<numpy::bool_>&

pd_datetime_array.h:373

View

DatetimeArray take(const std::vector<size_t>& indices, bool allow_fill = false, std::optional<numpy::datetime64> fill_value = std::nullopt) const

DatetimeArray

pd_datetime_array.h:440

View

Data Manipulation#

Signature

Return Type

Location

Example

DatetimeArray dropna() const

DatetimeArray

pd_datetime_array.h:538

View

Missing Data#

Signature

Return Type

Location

Example

DatetimeArray fillna(const numpy::datetime64& value) const

DatetimeArray

pd_datetime_array.h:523

View

numpy::NDArray<numpy::bool_> isna() const

numpy::NDArray<numpy::bool_>

pd_datetime_array.h:415

View

numpy::NDArray<numpy::bool_> notna() const

numpy::NDArray<numpy::bool_>

pd_datetime_array.h:422

View

Statistics#

Signature

Return Type

Location

Example

size_t count() const

size_t

pd_datetime_array.h:586

View

std::optional<numpy::datetime64> max() const

std::optional<numpy::datetime64>

pd_datetime_array.h:893

View

std::optional<numpy::datetime64> min() const

std::optional<numpy::datetime64>

pd_datetime_array.h:884

View

IntegerArray<numpy::int32> minute() const

IntegerArray<numpy::int32>

pd_datetime_array.h:982

View

Comparison#

Signature

Return Type

Location

Example

size_t len() const

size_t

pd_datetime_array.h:330

View

Sorting#

Signature

Return Type

Location

Example

numpy::NDArray<size_t> argsort(bool ascending = true, const std::string& na_position = "last") const

numpy::NDArray<size_t>

pd_datetime_array.h:810

View

Combining#

Signature

Return Type

Location

Example

static DatetimeArray concat(const std::vector<DatetimeArray>& arrays)

static DatetimeArray

pd_datetime_array.h:494

View

Time Series#

Signature

Return Type

Location

Example

DatetimeArray tz_convert(std::shared_ptr<numpy::TimezoneInfo> tz) const

DatetimeArray

pd_datetime_array.h:1449

View

DatetimeArray tz_convert(const std::string& tz_name, const std::string& tz_display = "") const

DatetimeArray

pd_datetime_array.h:1463

View

DatetimeArray tz_localize(std::shared_ptr<numpy::TimezoneInfo> tz, const std::string& ambiguous = "raise", const std::string& nonexistent = "raise") const

DatetimeArray

pd_datetime_array.h:1299

View

DatetimeArray tz_localize(const std::string& tz_name, const std::string& ambiguous = "raise", const std::string& nonexistent = "raise", const std::string& tz_display = "") const

DatetimeArray

pd_datetime_array.h:1421

View

I/O#

Signature

Return Type

Location

Example

std::string to_string() const

std::string

pd_datetime_array.h:1485

View

Conversion#

Signature

Return Type

Location

Example

DatetimeArray copy() const

DatetimeArray

pd_datetime_array.h:433

View

Set Operations#

Signature

Return Type

Location

Example

DatetimeArray unique() const

DatetimeArray

pd_datetime_array.h:748

View

Type Checking#

Signature

Return Type

Location

Example

BooleanArray is_leap_year() const

BooleanArray

pd_datetime_array.h:1208

View

BooleanArray is_month_end() const

BooleanArray

pd_datetime_array.h:1103

View

BooleanArray is_month_start() const

BooleanArray

pd_datetime_array.h:1084

View

bool is_na(size_t index) const

bool

pd_datetime_array.h:404

View

BooleanArray is_quarter_end() const

BooleanArray

pd_datetime_array.h:1146

View

BooleanArray is_quarter_start() const

BooleanArray

pd_datetime_array.h:1125

View

bool is_tz_aware() const

bool

pd_datetime_array.h:351

View

BooleanArray is_year_end() const

BooleanArray

pd_datetime_array.h:1189

View

BooleanArray is_year_start() const

BooleanArray

pd_datetime_array.h:1170

View

Other Methods#

Signature

Return Type

Location

Example

std::optional<size_t> argmax() const

std::optional<size_t>

pd_datetime_array.h:862

View

std::optional<size_t> argmin() const

std::optional<size_t>

pd_datetime_array.h:842

View

const numpy::NDArray<int64_t>& data() const

const numpy::NDArray<int64_t>&

pd_datetime_array.h:366

View

IntegerArray<numpy::int32> day() const

IntegerArray<numpy::int32>

pd_datetime_array.h:944

View

IntegerArray<numpy::int32> dayofweek() const

IntegerArray<numpy::int32>

pd_datetime_array.h:1020

View

IntegerArray<numpy::int32> dayofyear() const

IntegerArray<numpy::int32>

pd_datetime_array.h:1041

View

void detect_unit()

void

pd_datetime_array.h:1535

numpy::datetime64 dt(iso_strings[i])

numpy::datetime64

pd_datetime_array.h:224

View

dtype_type dtype() const

dtype_type

pd_datetime_array.h:288

View

bool empty() const

bool

pd_datetime_array.h:323

View

std::pair<numpy::NDArray<numpy::int64>, DatetimeArray> factorize() const

std::pair<numpy::NDArray<numpy::int64>, DatetimeArray>

pd_datetime_array.h:774

View

bool has_na() const

bool

pd_datetime_array.h:599

View

IntegerArray<numpy::int32> hour() const

IntegerArray<numpy::int32>

pd_datetime_array.h:963

View

IntegerArray<numpy::int32> month() const

IntegerArray<numpy::int32>

pd_datetime_array.h:925

View

size_t nbytes() const

size_t

pd_datetime_array.h:302

View

constexpr int ndim() const

constexpr int

pd_datetime_array.h:309

View

DatetimeArray normalize() const

DatetimeArray

pd_datetime_array.h:1272

View

IntegerArray<numpy::int32> quarter() const

IntegerArray<numpy::int32>

pd_datetime_array.h:1060

View

std::string repr() const

std::string

pd_datetime_array.h:1502

View

IntegerArray<numpy::int32> second() const

IntegerArray<numpy::int32>

pd_datetime_array.h:1001

View

std::vector<size_t> shape() const

std::vector<size_t>

pd_datetime_array.h:316

View

size_t size() const

size_t

pd_datetime_array.h:295

View

store_ns(result_data, i, wall_ns)

pd_datetime_array.h:1326

store_ns(result_data, i, utc_ns)

pd_datetime_array.h:1382

store_ns(result_data, i, utc_ns)

pd_datetime_array.h:1387

store_ns(result_data, i, utc_ns)

pd_datetime_array.h:1403

store_ns(result_data, i, valid_utc_times[0])

pd_datetime_array.h:1407

store_ns(result_data, i, utc_ns)

pd_datetime_array.h:1412

std::shared_ptr<numpy::TimezoneInfo> tz() const

std::shared_ptr<numpy::TimezoneInfo>

pd_datetime_array.h:344

View

numpy::DateTimeUnit unit() const

numpy::DateTimeUnit

pd_datetime_array.h:337

View

void validate_arrays()

void

pd_datetime_array.h:1515

IntegerArray<numpy::int32> year() const

IntegerArray<numpy::int32>

pd_datetime_array.h:906

View

Internal Methods#

2 internal methods (prefixed with underscore)

Code Examples#

The following examples are extracted from the test suite.

from_timestamps (pd_test_extension_array.cpp:9)
 1// pd_test_extension_array.cpp — L3 step L3.15 + Fix A storage-flip extensions
 2//
 3// Storage invariants and round-trip integrity for pandas::DatetimeArray and
 4// pandas::TimedeltaArray after the Fix A storage flip
 5// (do/plan_L3_fix_a_storage_flip.md, applied 2026-05-04).
 6//
 7// Verifies:
 8//   - sizeof(int64_t) == 8 (storage element size invariant)
 9//   - DatetimeArray::from_timestamps() round-trips a vector<optional<Timestamp>>
10//   - NaT slots are preserved through the round-trip (mask stays consistent)
11//   - tz-aware uniform-tz construction produces a non-null tz_ field
12//   - Boxing reconstruction via getElementAt() recovers the input ns values
13//   - Fix A storage shape: data() returns NDArray<int64_t> (8 B/elem, ns count)
14//   - NaT sentinel: INT64_MIN round-trips through the boxing layer
15//   - All 4 linear units (s/ms/us/ns) round-trip through ns-canonical storage
16//   - Tz-aware boxing reconstruction (UTC, US/Eastern, +05:30) preserves tz
17
18#include "../pandas/pd_datetime_array.h"
19#include "../pandas/pd_timedelta_array.h"
at (pd_test_1_all.cpp:6581)
6571            // Test isna/notna with float data
6572            {
6573                std::map<std::string, std::vector<numpy::float64>> float_data;
6574                float_data["X"] = {1.0, std::nan(""), 3.0};
6575                float_data["Y"] = {4.0, 5.0, std::nan("")};
6576                pandas::DataFrame df_na(float_data);
6577
6578                auto na_mask = df_na.isna();
6579                // Row 1, col 0 (X) should be NA
6580                if (!na_mask.getElementAt({1, 0})) {
6581                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (1,0) should be true" << std::endl;
6582                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (1,0)");
6583                }
6584                // Row 2, col 1 (Y) should be NA
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
mask (pd_test_1_all.cpp:9119)
9109void pd_test_datetime_mixin_array_constructor() {
9110    std::cout << "========= DatetimeTDMixin array constructor =========================";
9111
9112    // Create DatetimeArray with some values
9113    numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9114    data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2001
9115    data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2017
9116    data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2020
9117
9118    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9119    mask.setElementAt({0}, numpy::bool_(false));
9120    mask.setElementAt({1}, numpy::bool_(false));
9121    mask.setElementAt({2}, numpy::bool_(false));
9122
9123    pandas::DatetimeArray arr(data, mask);
9124    pandas::DatetimeTDMixin idx(arr, "timestamps");
9125
9126    bool passed = (idx.size() == 3 && !idx.empty() &&
9127                   idx.name().has_value() && *idx.name() == "timestamps" &&
9128                   idx.inferred_type() == "datetime");
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897    std::cout << "========= inherited take ==============================";
5898
5899    pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900    pandas::CategoricalIndex idx(arr);
5901
5902    std::vector<size_t> indices = {0, 2, 3};
5903    pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905    bool passed = (taken.size() == 3);
5906    if (!passed) {
5907        std::cout << "  [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908        throw std::runtime_error("pd_test_categorical_index_take failed");
5909    }
5910
5911    std::cout << " -> tests passed" << std::endl;
5912}
dropna (pd_test_1_all.cpp:531)
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
fillna (pd_test_1_all.cpp:537)
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542        }
543
544        std::cout << " -> tests passed" << std::endl;
545    }
546
547    void pd_test_categorical_array_add_categories() {
isna (pd_test_1_all.cpp:524)
514            throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515        }
516
517        // Test count (non-NA)
518        if (arr.count() != 2) {
519            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520            throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
notna (pd_test_1_all.cpp:6595)
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593                }
6594
6595                auto notna_mask = df_na.notna();
6596                if (notna_mask.getElementAt({1, 0})) {
6597                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598                    throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599                }
6600            }
6601
6602            // Test fillna
6603            {
6604                std::map<std::string, std::vector<numpy::float64>> float_data;
6605                float_data["X"] = {1.0, std::nan(""), 3.0};
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
minute (pd_test_1_all.cpp:7505)
7495    std::cout << "========= minute property =============================";
7496
7497    std::vector<std::optional<numpy::datetime64>> values = {
7498        make_dt(0),                    // Minute 0
7499        make_dt(30 * NS_PER_MIN),      // Minute 30
7500        make_dt(59 * NS_PER_MIN)       // Minute 59
7501    };
7502    pandas::DatetimeArray arr(values);
7503    pandas::DatetimeIndex idx(arr);
7504
7505    auto minutes = idx.minute();
7506
7507    bool passed = (minutes.size() == 3);
7508    auto m0 = minutes[0];
7509    auto m1 = minutes[1];
7510    auto m2 = minutes[2];
7511    passed = passed && m0.has_value() && *m0 == 0;
7512    passed = passed && m1.has_value() && *m1 == 30;
7513    passed = passed && m2.has_value() && *m2 == 59;
7514
7515    if (!passed) {
len (pd_test_3_all.cpp:20867)
20857    auto title_result = s.str().title();
20858    if (title_result[0] != "Hello World" || title_result[1] != "Hello World" || title_result[2] != "Hello World") {
20859        std::cout << "  [FAIL] : title() failed" << std::endl;
20860        throw std::runtime_error("pd_test_str_capitalize_title: title() failed");
20861    }
20862
20863    std::cout << " -> tests passed" << std::endl;
20864}
20865
20866// ============================================================================
20867// Test str().len()
20868// ============================================================================
20869
20870void pd_test_str_len() {
20871    std::cout << "========= Series.str().len() ============================";
20872
20873    pandas::Series<std::string> s({"a", "bb", "ccc", ""});
20874
20875    auto lens = s.str().len();
20876    if (lens[0] != 1 || lens[1] != 2 || lens[2] != 3 || lens[3] != 0) {
20877        std::cout << "  [FAIL] : len() failed" << std::endl;
argsort (pd_test_1_all.cpp:1304)
1294        std::cout << "========= DatetimeArray: sorting ======================= ";
1295
1296        pandas::DatetimeArray arr(std::vector<std::string>{
1297            "2023-06-15",
1298            "NaT",
1299            "2023-01-01",
1300            "2023-12-31"
1301        });
1302
1303        // argsort ascending
1304        auto indices = arr.argsort(true, "last");
1305        // Expected order: 2023-01-01(2), 2023-06-15(0), 2023-12-31(3), NaT(1)
1306        if (indices.getElementAt({0}) != 2) {
1307            std::cout << "  [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308            throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309        }
1310        if (indices.getElementAt({3}) != 1) {
1311            std::cout << "  [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312            throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313        }
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710    std::cout << "========= concat factory ==============================";
17711
17712    std::vector<int64_t> ordinals1 = {0, 1};
17713    std::vector<int64_t> ordinals2 = {2, 3};
17714    pandas::PeriodIndex idx1(ordinals1, "D");
17715    pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717    pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719    bool passed = (concatenated.size() == 4);
17720    if (!passed) {
17721        std::cout << "  [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722        throw std::runtime_error("pd_test_period_index_concat failed");
17723    }
17724
17725    std::cout << " -> tests passed" << std::endl;
17726}
tz_convert (pd_test_2_all.cpp:17874)
17864        std::cout << "====================================== [OK] pd_test_transform test suite ========================== " << std::endl;
17865        return 0;
17866    }
17867
17868} // namespace dataframe_tests
17869// ------------------- pd_test_transform.cpp (end) -----------------------------
17870
17871// ------------------- pd_test_tz_convert.cpp (start) -----------------------------
17872// dataframe_tests/pd_test_tz_convert.cpp
17873// Test for DataFrame.tz_convert() method
17874
17875#include <iostream>
17876#include <stdexcept>
17877#include <cmath>
17878#include "../pandas/pd_dataframe.h"
17879
17880namespace dataframe_tests {
17881    namespace dataframe_tests_tz_convert {
17882
17883        void pd_test_tz_convert_basic() {
tz_convert (pd_test_2_all.cpp:17874)
17864        std::cout << "====================================== [OK] pd_test_transform test suite ========================== " << std::endl;
17865        return 0;
17866    }
17867
17868} // namespace dataframe_tests
17869// ------------------- pd_test_transform.cpp (end) -----------------------------
17870
17871// ------------------- pd_test_tz_convert.cpp (start) -----------------------------
17872// dataframe_tests/pd_test_tz_convert.cpp
17873// Test for DataFrame.tz_convert() method
17874
17875#include <iostream>
17876#include <stdexcept>
17877#include <cmath>
17878#include "../pandas/pd_dataframe.h"
17879
17880namespace dataframe_tests {
17881    namespace dataframe_tests_tz_convert {
17882
17883        void pd_test_tz_convert_basic() {
tz_localize (pd_test_1_all.cpp:1431)
1421            "2023-06-15"
1422        });
1423
1424        // Initially should be timezone-naive
1425        if (arr.is_tz_aware()) {
1426            std::cout << "  [FAIL] : array should be timezone-naive initially" << std::endl;
1427            throw std::runtime_error("pd_test_datetime_array_timezone failed: naive");
1428        }
1429
1430        // Localize to UTC
1431        auto localized = arr.tz_localize("UTC");
1432        if (!localized.is_tz_aware()) {
1433            std::cout << "  [FAIL] : localized array should be timezone-aware" << std::endl;
1434            throw std::runtime_error("pd_test_datetime_array_timezone failed: localize");
1435        }
1436
1437        // Verify timezone name in dtype
1438        auto dt = localized.dtype();
1439        if (!dt.is_tz_aware()) {
1440            std::cout << "  [FAIL] : dtype should be timezone-aware" << std::endl;
1441            throw std::runtime_error("pd_test_datetime_array_timezone failed: dtype tz");
tz_localize (pd_test_1_all.cpp:1431)
1421            "2023-06-15"
1422        });
1423
1424        // Initially should be timezone-naive
1425        if (arr.is_tz_aware()) {
1426            std::cout << "  [FAIL] : array should be timezone-naive initially" << std::endl;
1427            throw std::runtime_error("pd_test_datetime_array_timezone failed: naive");
1428        }
1429
1430        // Localize to UTC
1431        auto localized = arr.tz_localize("UTC");
1432        if (!localized.is_tz_aware()) {
1433            std::cout << "  [FAIL] : localized array should be timezone-aware" << std::endl;
1434            throw std::runtime_error("pd_test_datetime_array_timezone failed: localize");
1435        }
1436
1437        // Verify timezone name in dtype
1438        auto dt = localized.dtype();
1439        if (!dt.is_tz_aware()) {
1440            std::cout << "  [FAIL] : dtype should be timezone-aware" << std::endl;
1441            throw std::runtime_error("pd_test_datetime_array_timezone failed: dtype tz");
to_string (pd_test_1_all.cpp:2693)
2683        pandas::PeriodArray arr_m(std::vector<std::string>{
2684            "2020-01",
2685            "NaT",
2686            "2025-06"
2687        }, "M");
2688
2689        // Year
2690        auto years = arr_m.year();
2691        auto y0 = years[0];
2692        if (!y0.has_value() || y0.value() != 2020) {
2693            std::cout << "  [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695        }
2696
2697        auto y1 = years[1];
2698        if (y1.has_value()) {
2699            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701        }
2702
2703        auto y2 = years[2];
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793    std::cout << "========= copy ========================================";
5794
5795    pandas::CategoricalArray arr({"a", "b", "c"});
5796    pandas::CategoricalIndex idx(arr, "original");
5797
5798    pandas::CategoricalIndex copied = idx.copy();
5799
5800    bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801                   copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802    if (!passed) {
5803        std::cout << "  [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804        throw std::runtime_error("pd_test_categorical_index_copy failed");
5805    }
5806
5807    std::cout << " -> tests passed" << std::endl;
5808}
unique (pd_test_1_all.cpp:1345)
1335        pandas::DatetimeArray arr(std::vector<std::string>{
1336            "2023-01-01",
1337            "2023-06-15",
1338            "2023-01-01",
1339            "NaT",
1340            "2023-06-15",
1341            "NaT"
1342        });
1343
1344        // unique
1345        auto uniq = arr.unique();
1346        // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1347        if (uniq.size() != 3) {
1348            std::cout << "  [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1349            throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1350        }
1351
1352        // factorize
1353        auto [codes, uniques] = arr.factorize();
1354        // Codes for NaT should be -1
1355        if (codes.getElementAt({3}) != -1) {
is_leap_year (pd_test_1_all.cpp:1280)
1270        }
1271
1272        // is_month_end
1273        auto me = arr.is_month_end();
1274        if (!me[1].has_value() || !me[1].value()) {
1275            std::cout << "  [FAIL] : 2023-03-31 should be month end" << std::endl;
1276            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: month end");
1277        }
1278
1279        // is_leap_year
1280        auto ly = arr.is_leap_year();
1281        if (!ly[2].has_value() || !ly[2].value()) {
1282            std::cout << "  [FAIL] : 2024 should be leap year" << std::endl;
1283            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: leap year");
1284        }
1285        if (!ly[0].has_value() || ly[0].value()) {
1286            std::cout << "  [FAIL] : 2023 should not be leap year" << std::endl;
1287            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: not leap year");
1288        }
1289
1290        std::cout << " -> tests passed" << std::endl;
is_month_end (pd_test_1_all.cpp:1273)
1263        }
1264
1265        // is_month_start
1266        auto ms = arr.is_month_start();
1267        if (!ms[0].has_value() || !ms[0].value()) {
1268            std::cout << "  [FAIL] : 2023-01-01 should be month start" << std::endl;
1269            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: month start");
1270        }
1271
1272        // is_month_end
1273        auto me = arr.is_month_end();
1274        if (!me[1].has_value() || !me[1].value()) {
1275            std::cout << "  [FAIL] : 2023-03-31 should be month end" << std::endl;
1276            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: month end");
1277        }
1278
1279        // is_leap_year
1280        auto ly = arr.is_leap_year();
1281        if (!ly[2].has_value() || !ly[2].value()) {
1282            std::cout << "  [FAIL] : 2024 should be leap year" << std::endl;
1283            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: leap year");
is_month_start (pd_test_1_all.cpp:1266)
1256        if (!ys[0].has_value() || !ys[0].value()) {
1257            std::cout << "  [FAIL] : 2023-01-01 should be year start" << std::endl;
1258            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: year start");
1259        }
1260        if (!ys[1].has_value() || ys[1].value()) {
1261            std::cout << "  [FAIL] : 2023-03-31 should not be year start" << std::endl;
1262            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: not year start");
1263        }
1264
1265        // is_month_start
1266        auto ms = arr.is_month_start();
1267        if (!ms[0].has_value() || !ms[0].value()) {
1268            std::cout << "  [FAIL] : 2023-01-01 should be month start" << std::endl;
1269            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: month start");
1270        }
1271
1272        // is_month_end
1273        auto me = arr.is_month_end();
1274        if (!me[1].has_value() || !me[1].value()) {
1275            std::cout << "  [FAIL] : 2023-03-31 should be month end" << std::endl;
1276            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: month end");
is_na (pd_test_1_all.cpp:51)
41    void pd_test_boolean_array_na_handling() {
42        std::cout << "========= BooleanArray: NA handling ======================= ";
43
44        pandas::BooleanArray arr({
45            std::optional<bool>(true),
46            std::nullopt,  // NA at index 1
47            std::optional<bool>(false)
48        });
49
50        if (!arr.is_na(1)) {
51            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
52            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
53        }
54
55        if (arr.is_na(0)) {
56            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
57            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
58        }
59
60        if (!arr.has_na()) {
is_quarter_end (pd_test_3_all.cpp:25056)
25046    };
25047    pandas::Series<numpy::datetime64> s(dates);
25048    pandas::DatetimeProperties<pandas::Series<numpy::datetime64>> dt(s);
25049    if (dt.has_nat()) throw std::runtime_error("has_nat should be false for clean series");
25050    auto ms = dt.is_month_start();
25051    if (ms[0] != true || ms[1] != false) throw std::runtime_error("is_month_start failed");
25052    auto me = dt.is_month_end();
25053    if (me[1] != true || me[0] != false) throw std::runtime_error("is_month_end failed");
25054    auto qs = dt.is_quarter_start();
25055    if (qs[0] != true || qs[1] != false) throw std::runtime_error("is_quarter_start failed");
25056    auto qe = dt.is_quarter_end();
25057    if (qe[2] != true || qe[0] != false) throw std::runtime_error("is_quarter_end failed");
25058    auto ys = dt.is_year_start();
25059    if (ys[0] != true || ys[1] != false) throw std::runtime_error("is_year_start failed");
25060    auto ye = dt.is_year_end();
25061    if (ye[3] != true || ye[0] != false) throw std::runtime_error("is_year_end failed");
25062    std::cout << " -> tests passed" << std::endl;
25063}
25064
25065void pd_test_dt_bool_na_with_nat() {
25066    std::cout << "========= pd_test_dt_bool_na: series with NaT ==========";
is_quarter_start (pd_test_3_all.cpp:25054)
25044        numpy::datetime64("2024-03-31"),
25045        numpy::datetime64("2024-12-31")
25046    };
25047    pandas::Series<numpy::datetime64> s(dates);
25048    pandas::DatetimeProperties<pandas::Series<numpy::datetime64>> dt(s);
25049    if (dt.has_nat()) throw std::runtime_error("has_nat should be false for clean series");
25050    auto ms = dt.is_month_start();
25051    if (ms[0] != true || ms[1] != false) throw std::runtime_error("is_month_start failed");
25052    auto me = dt.is_month_end();
25053    if (me[1] != true || me[0] != false) throw std::runtime_error("is_month_end failed");
25054    auto qs = dt.is_quarter_start();
25055    if (qs[0] != true || qs[1] != false) throw std::runtime_error("is_quarter_start failed");
25056    auto qe = dt.is_quarter_end();
25057    if (qe[2] != true || qe[0] != false) throw std::runtime_error("is_quarter_end failed");
25058    auto ys = dt.is_year_start();
25059    if (ys[0] != true || ys[1] != false) throw std::runtime_error("is_year_start failed");
25060    auto ye = dt.is_year_end();
25061    if (ye[3] != true || ye[0] != false) throw std::runtime_error("is_year_end failed");
25062    std::cout << " -> tests passed" << std::endl;
25063}
is_tz_aware (pd_test_1_all.cpp:1425)
1415    void pd_test_datetime_array_timezone() {
1416        std::cout << "========= DatetimeArray: timezone ======================= ";
1417
1418        pandas::DatetimeArray arr(std::vector<std::string>{
1419            "2023-01-01",
1420            "2023-06-15"
1421        });
1422
1423        // Initially should be timezone-naive
1424        if (arr.is_tz_aware()) {
1425            std::cout << "  [FAIL] : array should be timezone-naive initially" << std::endl;
1426            throw std::runtime_error("pd_test_datetime_array_timezone failed: naive");
1427        }
1428
1429        // Localize to UTC
1430        auto localized = arr.tz_localize("UTC");
1431        if (!localized.is_tz_aware()) {
1432            std::cout << "  [FAIL] : localized array should be timezone-aware" << std::endl;
1433            throw std::runtime_error("pd_test_datetime_array_timezone failed: localize");
1434        }
is_year_end (pd_test_3_all.cpp:25060)
25050    auto ms = dt.is_month_start();
25051    if (ms[0] != true || ms[1] != false) throw std::runtime_error("is_month_start failed");
25052    auto me = dt.is_month_end();
25053    if (me[1] != true || me[0] != false) throw std::runtime_error("is_month_end failed");
25054    auto qs = dt.is_quarter_start();
25055    if (qs[0] != true || qs[1] != false) throw std::runtime_error("is_quarter_start failed");
25056    auto qe = dt.is_quarter_end();
25057    if (qe[2] != true || qe[0] != false) throw std::runtime_error("is_quarter_end failed");
25058    auto ys = dt.is_year_start();
25059    if (ys[0] != true || ys[1] != false) throw std::runtime_error("is_year_start failed");
25060    auto ye = dt.is_year_end();
25061    if (ye[3] != true || ye[0] != false) throw std::runtime_error("is_year_end failed");
25062    std::cout << " -> tests passed" << std::endl;
25063}
25064
25065void pd_test_dt_bool_na_with_nat() {
25066    std::cout << "========= pd_test_dt_bool_na: series with NaT ==========";
25067    std::vector<numpy::datetime64> dates = {
25068        numpy::datetime64("2024-01-01"),
25069        numpy::datetime64(),  // NaT
25070        numpy::datetime64("2024-12-31")
is_year_start (pd_test_1_all.cpp:1255)
1245        std::cout << "========= DatetimeArray: boolean properties ======================= ";
1246
1247        pandas::DatetimeArray arr(std::vector<std::string>{
1248            "2023-01-01",   // year start, month start
1249            "2023-03-31",   // quarter end, month end
1250            "2024-02-29",   // leap year (2024 is leap year)
1251            "2023-12-31"    // year end, month end
1252        });
1253
1254        // is_year_start
1255        auto ys = arr.is_year_start();
1256        if (!ys[0].has_value() || !ys[0].value()) {
1257            std::cout << "  [FAIL] : 2023-01-01 should be year start" << std::endl;
1258            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: year start");
1259        }
1260        if (!ys[1].has_value() || ys[1].value()) {
1261            std::cout << "  [FAIL] : 2023-03-31 should not be year start" << std::endl;
1262            throw std::runtime_error("pd_test_datetime_array_boolean_props failed: not year start");
1263        }
1264
1265        // is_month_start
argmax (pd_test_1_all.cpp:1323)
1313        }
1314
1315        // argmin
1316        auto min_idx = arr.argmin();
1317        if (!min_idx.has_value() || min_idx.value() != 2) {
1318            std::cout << "  [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320        }
1321
1322        // argmax
1323        auto max_idx = arr.argmax();
1324        if (!max_idx.has_value() || max_idx.value() != 3) {
1325            std::cout << "  [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
1327        }
1328
1329        std::cout << " -> tests passed" << std::endl;
1330    }
1331
1332    void pd_test_datetime_array_unique() {
1333        std::cout << "========= DatetimeArray: unique/factorize ======================= ";
argmin (pd_test_1_all.cpp:1316)
1306        if (indices.getElementAt({0}) != 2) {
1307            std::cout << "  [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308            throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309        }
1310        if (indices.getElementAt({3}) != 1) {
1311            std::cout << "  [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312            throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313        }
1314
1315        // argmin
1316        auto min_idx = arr.argmin();
1317        if (!min_idx.has_value() || min_idx.value() != 2) {
1318            std::cout << "  [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320        }
1321
1322        // argmax
1323        auto max_idx = arr.argmax();
1324        if (!max_idx.has_value() || max_idx.value() != 3) {
1325            std::cout << "  [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
data (pd_test_1_all.cpp:9114)
9104        throw std::runtime_error("pd_test_datetime_mixin_default_constructor failed");
9105    }
9106
9107    std::cout << " -> tests passed" << std::endl;
9108}
9109
9110void pd_test_datetime_mixin_array_constructor() {
9111    std::cout << "========= DatetimeTDMixin array constructor =========================";
9112
9113    // Create DatetimeArray with some values
9114    numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9115    data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2001
9116    data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2017
9117    data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2020
9118
9119    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9120    mask.setElementAt({0}, numpy::bool_(false));
9121    mask.setElementAt({1}, numpy::bool_(false));
9122    mask.setElementAt({2}, numpy::bool_(false));
9123
9124    pandas::DatetimeArray arr(data, mask);
day (pd_test_1_all.cpp:1193)
1183            std::cout << "  [FAIL] : month[0] should be 3" << std::endl;
1184            throw std::runtime_error("pd_test_datetime_array_component_month_day failed: month[0]");
1185        }
1186        auto m1 = months[1];
1187        if (!m1.has_value() || m1.value() != 12) {
1188            std::cout << "  [FAIL] : month[1] should be 12" << std::endl;
1189            throw std::runtime_error("pd_test_datetime_array_component_month_day failed: month[1]");
1190        }
1191
1192        // Day
1193        auto days = arr.day();
1194        auto d0 = days[0];
1195        if (!d0.has_value() || d0.value() != 15) {
1196            std::cout << "  [FAIL] : day[0] should be 15" << std::endl;
1197            throw std::runtime_error("pd_test_datetime_array_component_month_day failed: day[0]");
1198        }
1199        auto d1 = days[1];
1200        if (!d1.has_value() || d1.value() != 25) {
1201            std::cout << "  [FAIL] : day[1] should be 25" << std::endl;
1202            throw std::runtime_error("pd_test_datetime_array_component_month_day failed: day[1]");
1203        }
dayofweek (pd_test_1_all.cpp:7565)
7555    // 1970-01-01 was a Thursday (day 3)
7556    std::vector<std::optional<numpy::datetime64>> values = {
7557        make_dt(0),                // Thursday (3)
7558        make_dt(NS_PER_DAY),       // Friday (4)
7559        make_dt(2 * NS_PER_DAY),   // Saturday (5)
7560        make_dt(3 * NS_PER_DAY)    // Sunday (6)
7561    };
7562    pandas::DatetimeArray arr(values);
7563    pandas::DatetimeIndex idx(arr);
7564
7565    auto dow = idx.dayofweek();
7566
7567    bool passed = (dow.size() == 4);
7568    if (!passed) {
7569        std::cout << "  [FAIL] : in pd_test_datetime_index_dayofweek()" << std::endl;
7570        throw std::runtime_error("pd_test_datetime_index_dayofweek failed");
7571    }
7572
7573    std::cout << " -> tests passed" << std::endl;
7574}
dayofyear (pd_test_3_all.cpp:18582)
18572    auto seconds = s.dt().second();
18573    if (seconds[0] != 45 || seconds[1] != 30 || seconds[2] != 59) {
18574        std::cout << "  [FAIL] : second() failed" << std::endl;
18575        throw std::runtime_error("pd_test_dt_time_components: second() failed");
18576    }
18577
18578    std::cout << " -> tests passed" << std::endl;
18579}
18580
18581// ============================================================================
18582// Test dt().dayofweek(), dt().dayofyear(), dt().quarter()
18583// ============================================================================
18584
18585void pd_test_dt_derived_properties() {
18586    std::cout << "========= Series.dt().dayofweek/dayofyear/quarter() ======";
18587
18588    // 2020-01-01 is a Wednesday (dayofweek=2), dayofyear=1, Q1
18589    // 2020-07-04 is a Saturday (dayofweek=5), dayofyear=186, Q3
18590    pandas::Series<std::string> s({"2020-01-01", "2020-07-04"});
18591
18592    auto dow = s.dt().dayofweek();
dt (pd_test_3_all.cpp:18239)
18229    if (offset.freqstr() != "D") {
18230        std::cout << "  [FAIL] : Day freqstr() failed" << std::endl;
18231        throw std::runtime_error("pd_test_day_offset: freqstr() failed");
18232    }
18233    if (offset.name() != "Day") {
18234        std::cout << "  [FAIL] : Day name() failed" << std::endl;
18235        throw std::runtime_error("pd_test_day_offset: name() failed");
18236    }
18237
18238    // Test apply
18239    numpy::datetime64 dt("2020-01-15");
18240    auto result = offset.apply(dt);
18241    std::tm tm = result.toTm();
18242    if (tm.tm_mday != 20) {
18243        std::cout << "  [FAIL] : Day apply() failed, got day " << tm.tm_mday << std::endl;
18244        throw std::runtime_error("pd_test_day_offset: apply() failed");
18245    }
18246
18247    std::cout << " -> tests passed" << std::endl;
18248}
dtype (pd_test_1_all.cpp:295)
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
293
294        pandas::BooleanArray arr;
295        if (arr.dtype().name() != "boolean") {
296            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298        }
299
300        if (arr.dtype().kind() != "b") {
301            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303        }
304
305        std::cout << " -> tests passed" << std::endl;
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937    void pd_test_config_version() {
938        std::cout << "========= df_config: version info ======================= ";
939        const char* version = pandas::DataFrameInfo::version();
940        if (version == nullptr || std::string(version).empty()) {
941            std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942            throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943        }
944        std::cout << "-> tests passed" << std::endl;
945    }
946
947    void pd_test_config_na_repr() {
948        std::cout << "========= df_config: NA representation ======================= ";
949        const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950        if (na_repr == nullptr) {
factorize (pd_test_1_all.cpp:1353)
1343        // unique
1344        auto uniq = arr.unique();
1345        // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1346        if (uniq.size() != 3) {
1347            std::cout << "  [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1348            throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1349        }
1350
1351        // factorize
1352        auto [codes, uniques] = arr.factorize();
1353        // Codes for NaT should be -1
1354        if (codes.getElementAt({3}) != -1) {
1355            std::cout << "  [FAIL] : factorize: NaT code should be -1" << std::endl;
1356            throw std::runtime_error("pd_test_datetime_array_unique failed: NaT code");
1357        }
1358        // Same values should have same codes
1359        if (codes.getElementAt({0}) != codes.getElementAt({2})) {
1360            std::cout << "  [FAIL] : factorize: 2023-01-01 values should have same code" << std::endl;
1361            throw std::runtime_error("pd_test_datetime_array_unique failed: same code");
1362        }
has_na (pd_test_1_all.cpp:61)
51        if (!arr.is_na(1)) {
52            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54        }
55
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
hour (pd_test_1_all.cpp:7476)
7466    std::cout << "========= hour property ===============================";
7467
7468    std::vector<std::optional<numpy::datetime64>> values = {
7469        make_dt(0),                    // Hour 0
7470        make_dt(6 * NS_PER_HOUR),      // Hour 6
7471        make_dt(23 * NS_PER_HOUR)      // Hour 23
7472    };
7473    pandas::DatetimeArray arr(values);
7474    pandas::DatetimeIndex idx(arr);
7475
7476    auto hours = idx.hour();
7477
7478    bool passed = (hours.size() == 3);
7479    auto h0 = hours[0];
7480    auto h1 = hours[1];
7481    auto h2 = hours[2];
7482    passed = passed && h0.has_value() && *h0 == 0;
7483    passed = passed && h1.has_value() && *h1 == 6;
7484    passed = passed && h2.has_value() && *h2 == 23;
7485
7486    if (!passed) {
month (pd_test_1_all.cpp:1180)
1170    void pd_test_datetime_array_component_month_day() {
1171        std::cout << "========= DatetimeArray: month/day components ======================= ";
1172
1173        pandas::DatetimeArray arr(std::vector<std::string>{
1174            "2023-03-15",
1175            "2023-12-25",
1176            "NaT"
1177        });
1178
1179        // Month
1180        auto months = arr.month();
1181        auto m0 = months[0];
1182        if (!m0.has_value() || m0.value() != 3) {
1183            std::cout << "  [FAIL] : month[0] should be 3" << std::endl;
1184            throw std::runtime_error("pd_test_datetime_array_component_month_day failed: month[0]");
1185        }
1186        auto m1 = months[1];
1187        if (!m1.has_value() || m1.value() != 12) {
1188            std::cout << "  [FAIL] : month[1] should be 12" << std::endl;
1189            throw std::runtime_error("pd_test_datetime_array_component_month_day failed: month[1]");
1190        }
nbytes (pd_test_1_all.cpp:6214)
6204            }
6205
6206            // Test empty DataFrame
6207            pandas::DataFrame empty_df;
6208            if (!empty_df.empty()) {
6209                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210                throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211            }
6212
6213            // Test nbytes > 0 for non-empty
6214            if (df.nbytes() == 0) {
6215                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216                throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217            }
6218
6219            // Test columns index
6220            if (df.columns().size() != 3) {
6221                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222                throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223            }
ndim (pd_test_1_all.cpp:6195)
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
6199
6200            // Test empty
6201            if (df.empty()) {
6202                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203                throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204            }
normalize (pd_test_1_all.cpp:8723)
8713void pd_test_datetime_mixin_normalize() {
8714    std::cout << "========= normalize ===================================";
8715
8716    // Create datetime with time component
8717    std::vector<std::optional<numpy::datetime64>> values = {
8718        numpy::datetime64(86400000000000LL + 3600000000000LL, numpy::DateTimeUnit::Nanosecond)  // 1 day + 1 hour
8719    };
8720    pandas::DatetimeArray arr(values);
8721    pandas::DatetimeMixinIndex idx(arr);
8722
8723    pandas::DatetimeMixinIndex normalized = idx.normalize();
8724
8725    bool passed = (normalized.size() == 1);
8726    if (!passed) {
8727        std::cout << "  [FAIL] : in pd_test_datetime_mixin_normalize()" << std::endl;
8728        throw std::runtime_error("pd_test_datetime_mixin_normalize failed");
8729    }
8730
8731    std::cout << " -> tests passed" << std::endl;
8732}
quarter (pd_test_1_all.cpp:1218)
1208    void pd_test_datetime_array_quarter() {
1209        std::cout << "========= DatetimeArray: quarter ======================= ";
1210
1211        pandas::DatetimeArray arr(std::vector<std::string>{
1212            "2023-01-15",  // Q1
1213            "2023-05-20",  // Q2
1214            "2023-09-10",  // Q3
1215            "2023-11-25"   // Q4
1216        });
1217
1218        auto quarters = arr.quarter();
1219
1220        auto q0 = quarters[0];
1221        if (!q0.has_value() || q0.value() != 1) {
1222            std::cout << "  [FAIL] : quarter[0] should be 1" << std::endl;
1223            throw std::runtime_error("pd_test_datetime_array_quarter failed: quarter[0]");
1224        }
1225        auto q1 = quarters[1];
1226        if (!q1.has_value() || q1.value() != 2) {
1227            std::cout << "  [FAIL] : quarter[1] should be 2" << std::endl;
1228            throw std::runtime_error("pd_test_datetime_array_quarter failed: quarter[1]");
repr (pd_test_1_all.cpp:10906)
10896    std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900    std::cout << "========= repr =========================";
10901
10902    pandas::CategoricalArray arr({"a", "b", "c"});
10903    // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904    pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906    std::string repr_str = idx.repr();
10907
10908    bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909    if (!passed) {
10910        std::cout << "  [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911        throw std::runtime_error("pd_test_extension_index_repr failed");
10912    }
10913
10914    std::cout << " -> tests passed" << std::endl;
10915}
second (pd_test_1_all.cpp:7534)
7524    std::cout << "========= second property =============================";
7525
7526    std::vector<std::optional<numpy::datetime64>> values = {
7527        make_dt(0),                    // Second 0
7528        make_dt(30 * NS_PER_SEC),      // Second 30
7529        make_dt(59 * NS_PER_SEC)       // Second 59
7530    };
7531    pandas::DatetimeArray arr(values);
7532    pandas::DatetimeIndex idx(arr);
7533
7534    auto seconds = idx.second();
7535
7536    bool passed = (seconds.size() == 3);
7537    auto s0 = seconds[0];
7538    auto s1 = seconds[1];
7539    auto s2 = seconds[2];
7540    passed = passed && s0.has_value() && *s0 == 0;
7541    passed = passed && s1.has_value() && *s1 == 30;
7542    passed = passed && s2.has_value() && *s2 == 59;
7543
7544    if (!passed) {
shape (pd_test_1_all.cpp:6188)
6178            std::cout << "========= properties =======================";
6179
6180            std::map<std::string, std::vector<numpy::float64>> data;
6181            data["A"] = {1.0, 2.0, 3.0, 4.0};
6182            data["B"] = {5.0, 6.0, 7.0, 8.0};
6183            data["C"] = {9.0, 10.0, 11.0, 12.0};
6184
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
tz (pd_test_2_all.cpp:17914)
17904            pandas::DataFrame df(data);
17905            df.set_index(std::make_unique<pandas::DatetimeIndex>(tz_aware_idx));
17906
17907            // Verify the index is timezone-aware
17908            const pandas::DatetimeIndex* original_idx = dynamic_cast<const pandas::DatetimeIndex*>(&df.index());
17909            if (!original_idx) {
17910                std::cout << "  [FAIL] : in pd_test_tz_convert_basic() : index is not DatetimeIndex" << std::endl;
17911                throw std::runtime_error("pd_test_tz_convert_basic failed: index is not DatetimeIndex");
17912            }
17913
17914            std::string original_tz = original_idx->tz();
17915            if (original_tz.empty()) {
17916                std::cout << "  [FAIL] : in pd_test_tz_convert_basic() : original index is not timezone-aware" << std::endl;
17917                throw std::runtime_error("pd_test_tz_convert_basic failed: original index is not timezone-aware");
17918            }
17919
17920            // Convert to Asia/Shanghai timezone
17921            pandas::DataFrame df_shanghai = df.tz_convert("Asia/Shanghai");
17922
17923            // Verify result has a DatetimeIndex
17924            const pandas::DatetimeIndex* converted_idx = dynamic_cast<const pandas::DatetimeIndex*>(&df_shanghai.index());
unit (pd_test_1_all.cpp:9284)
9274    data.setElementAt({0}, numpy::datetime64(1000LL, numpy::DateTimeUnit::Nanosecond));
9275    data.setElementAt({1}, numpy::datetime64(2000LL, numpy::DateTimeUnit::Nanosecond));
9276
9277    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
9278    mask.setElementAt({0}, numpy::bool_(false));
9279    mask.setElementAt({1}, numpy::bool_(false));
9280
9281    pandas::DatetimeArray arr(data, mask);
9282    pandas::DatetimeTDMixin idx(arr);
9283
9284    std::string unit = idx.unit();
9285
9286    bool passed = (unit == "ns");  // nanosecond
9287    if (!passed) {
9288        std::cout << "  [FAIL] : in pd_test_datetime_unit_property() : unit property check failed, got '" << unit << "'" << std::endl;
9289        throw std::runtime_error("pd_test_datetime_unit_property failed");
9290    }
9291
9292    std::cout << " -> tests passed" << std::endl;
9293}
year (pd_test_1_all.cpp:1147)
1137    void pd_test_datetime_array_component_year() {
1138        std::cout << "========= DatetimeArray: year component ======================= ";
1139
1140        pandas::DatetimeArray arr(std::vector<std::string>{
1141            "2020-01-15",
1142            "NaT",
1143            "2025-06-20"
1144        });
1145
1146        auto years = arr.year();
1147
1148        auto y0 = years[0];
1149        if (!y0.has_value() || y0.value() != 2020) {
1150            std::cout << "  [FAIL] : year[0] should be 2020" << std::endl;
1151            throw std::runtime_error("pd_test_datetime_array_component_year failed: year[0]");
1152        }
1153
1154        auto y1 = years[1];
1155        if (y1.has_value()) {
1156            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;