NDArrayBackedExtensionIndex#

class pandas::NDArrayBackedExtensionIndex#

Index class for axis labels in pandas data structures.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Create NDArrayBackedExtensionIndex
NDArrayBackedExtensionIndex<int64_t> idx({1, 2, 3}, "my_index");
size_t len = idx.size();

Constructors#

Signature

Location

Example

explicit NDArrayBackedExtensionIndex(const ArrayType& array, const std::optional<std::string>& name = std::nullopt, bool copy = false)

pd_ndarray_backed_extension_index.h:141

explicit NDArrayBackedExtensionIndex(ArrayType&& array, const std::optional<std::string>& name = std::nullopt)

pd_ndarray_backed_extension_index.h:151

NDArrayBackedExtensionIndex(const NDArrayBackedExtensionIndex& other)

pd_ndarray_backed_extension_index.h:160

NDArrayBackedExtensionIndex(NDArrayBackedExtensionIndex&& other) noexcept

pd_ndarray_backed_extension_index.h:168

Indexing / Selection#

Signature

Return Type

Location

Example

std::string get_value_str(size_t index) const override

std::string

pd_ndarray_backed_extension_index.h:705

View

Data Manipulation#

Signature

Return Type

Location

Example

NDArrayBackedExtensionIndex rename(const std::optional<std::string>& new_name) const

NDArrayBackedExtensionIndex

pd_ndarray_backed_extension_index.h:757

View

I/O#

Signature

Return Type

Location

Example

std::string to_string() const override

std::string

pd_ndarray_backed_extension_index.h:618

View

Conversion#

Signature

Return Type

Location

Example

NDArrayBackedExtensionIndex copy() const

NDArrayBackedExtensionIndex

pd_ndarray_backed_extension_index.h:750

View

Type Checking#

Signature

Return Type

Location

Example

bool is_all_dates() const

bool

pd_ndarray_backed_extension_index.h:489

View

Other Methods#

Signature

Return Type

Location

Example

DatetimeArray arr(data, mask)

DatetimeArray

pd_ndarray_backed_extension_index.h:279

View

TimedeltaArray arr(data, mask)

TimedeltaArray

pd_ndarray_backed_extension_index.h:296

View

DatetimeArray arr(data, mask)

DatetimeArray

pd_ndarray_backed_extension_index.h:349

View

TimedeltaArray arr(data, mask)

TimedeltaArray

pd_ndarray_backed_extension_index.h:365

View

PeriodArray arr(data, mask, "D")

PeriodArray

pd_ndarray_backed_extension_index.h:379

View

PeriodArray arr(data, mask, freq)

PeriodArray

pd_ndarray_backed_extension_index.h:405

View

size_t cache_memory_usage() const override

size_t

pd_ndarray_backed_extension_index.h:957

void clear_cache() const override

void

pd_ndarray_backed_extension_index.h:938

View

std::unique_ptr<IndexBase> clone() const override

std::unique_ptr<IndexBase>

pd_ndarray_backed_extension_index.h:607

View

static std::string format_datetime(const numpy::datetime64& val)

static std::string

pd_ndarray_backed_extension_index.h:771

std::string format_period(numpy::int64 ordinal) const

std::string

pd_ndarray_backed_extension_index.h:920

static std::string format_timedelta(const numpy::timedelta64& val)

static std::string

pd_ndarray_backed_extension_index.h:846

std::string freq_string() const

std::string

pd_ndarray_backed_extension_index.h:566

View

bool has_cached_values() const override

bool

pd_ndarray_backed_extension_index.h:948

View

std::string inferred_type() const override

std::string

pd_ndarray_backed_extension_index.h:593

View

PeriodArray arr(data, mask, period_freq_to_string(current_arr.freq()))

PeriodArray arr(data, mask,

pd_ndarray_backed_extension_index.h:312

std::string repr() const override

std::string

pd_ndarray_backed_extension_index.h:698

View

std::string resolution() const

std::string

pd_ndarray_backed_extension_index.h:427

View

NDArrayBackedExtensionIndex result(\*this)

NDArrayBackedExtensionIndex

pd_ndarray_backed_extension_index.h:758

View

IndexTypeId type_id() const override

IndexTypeId

pd_ndarray_backed_extension_index.h:611

View

std::string unit() const

std::string

pd_ndarray_backed_extension_index.h:528

View

static std::string unit_to_string(numpy::DateTimeUnit unit)

static std::string

pd_ndarray_backed_extension_index.h:543

Internal Methods#

6 internal methods (prefixed with underscore)

Code Examples#

The following examples are extracted from the test suite.

get_value_str (pd_test_1_all.cpp:4665)
4655            auto corr_df = df.corr();
4656
4657            // Check dimensions
4658            bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659            if (!passed) {
4660                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662            }
4663
4664            // Diagonal should be 1.0
4665            std::string aa = corr_df["A"].get_value_str(0);
4666            passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667            if (!passed) {
4668                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
4669                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: diagonal should be 1.0");
4670            }
4671
4672            // A-B correlation should be 1.0 (perfect correlation)
4673            std::string ab = corr_df["B"].get_value_str(0);
4674            passed = std::abs(std::stod(ab) - 1.0) < 0.001;
4675            if (!passed) {
rename (pd_test_1_all.cpp:5816)
5806    std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810    std::cout << "========= rename ======================================";
5811
5812    pandas::CategoricalArray arr({"x", "y"});
5813    pandas::CategoricalIndex idx(arr, "old_name");
5814
5815    pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817    bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818                   renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819    if (!passed) {
5820        std::cout << "  [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821        throw std::runtime_error("pd_test_categorical_index_rename failed");
5822    }
5823
5824    std::cout << " -> tests passed" << std::endl;
5825}
to_string (pd_test_1_all.cpp:2693)
2683        pandas::PeriodArray arr_m(std::vector<std::string>{
2684            "2020-01",
2685            "NaT",
2686            "2025-06"
2687        }, "M");
2688
2689        // Year
2690        auto years = arr_m.year();
2691        auto y0 = years[0];
2692        if (!y0.has_value() || y0.value() != 2020) {
2693            std::cout << "  [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695        }
2696
2697        auto y1 = years[1];
2698        if (y1.has_value()) {
2699            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701        }
2702
2703        auto y2 = years[2];
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793    std::cout << "========= copy ========================================";
5794
5795    pandas::CategoricalArray arr({"a", "b", "c"});
5796    pandas::CategoricalIndex idx(arr, "original");
5797
5798    pandas::CategoricalIndex copied = idx.copy();
5799
5800    bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801                   copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802    if (!passed) {
5803        std::cout << "  [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804        throw std::runtime_error("pd_test_categorical_index_copy failed");
5805    }
5806
5807    std::cout << " -> tests passed" << std::endl;
5808}
is_all_dates (pd_test_1_all.cpp:15606)
15596    bool passed = (res == "day");
15597    if (!passed) {
15598        std::cout << "  [FAIL] : in pd_test_period_index_resolution() : resolution check failed, got: " << res << std::endl;
15599        throw std::runtime_error("pd_test_period_index_resolution failed");
15600    }
15601
15602    std::cout << " -> tests passed" << std::endl;
15603}
15604
15605// ============================================================================
15606// is_all_dates() Tests
15607// ============================================================================
15608
15609void pd_test_is_all_dates() {
15610    std::cout << "========= is_all_dates() =========================";
15611
15612    // DatetimeIndex
15613    numpy::NDArray<numpy::datetime64> dt_data(std::vector<size_t>{1});
15614    dt_data.setElementAt({0}, numpy::datetime64(1000LL, numpy::DateTimeUnit::Nanosecond));
15615    numpy::NDArray<numpy::bool_> dt_mask(std::vector<size_t>{1});
15616    dt_mask.setElementAt({0}, numpy::bool_(false));
arr (pd_test_1_all.cpp:45)
35            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : initializer_list size != 4" << std::endl;
36            throw std::runtime_error("pd_test_boolean_array_constructors failed: initializer_list size != 4");
37        }
38
39        std::cout << " -> tests passed" << std::endl;
40    }
41
42    void pd_test_boolean_array_na_handling() {
43        std::cout << "========= BooleanArray: NA handling ======================= ";
44
45        pandas::BooleanArray arr({
46            std::optional<bool>(true),
47            std::nullopt,  // NA at index 1
48            std::optional<bool>(false)
49        });
50
51        if (!arr.is_na(1)) {
52            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54        }
arr (pd_test_1_all.cpp:45)
35            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : initializer_list size != 4" << std::endl;
36            throw std::runtime_error("pd_test_boolean_array_constructors failed: initializer_list size != 4");
37        }
38
39        std::cout << " -> tests passed" << std::endl;
40    }
41
42    void pd_test_boolean_array_na_handling() {
43        std::cout << "========= BooleanArray: NA handling ======================= ";
44
45        pandas::BooleanArray arr({
46            std::optional<bool>(true),
47            std::nullopt,  // NA at index 1
48            std::optional<bool>(false)
49        });
50
51        if (!arr.is_na(1)) {
52            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54        }
arr (pd_test_1_all.cpp:45)
35            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : initializer_list size != 4" << std::endl;
36            throw std::runtime_error("pd_test_boolean_array_constructors failed: initializer_list size != 4");
37        }
38
39        std::cout << " -> tests passed" << std::endl;
40    }
41
42    void pd_test_boolean_array_na_handling() {
43        std::cout << "========= BooleanArray: NA handling ======================= ";
44
45        pandas::BooleanArray arr({
46            std::optional<bool>(true),
47            std::nullopt,  // NA at index 1
48            std::optional<bool>(false)
49        });
50
51        if (!arr.is_na(1)) {
52            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54        }
arr (pd_test_1_all.cpp:45)
35            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : initializer_list size != 4" << std::endl;
36            throw std::runtime_error("pd_test_boolean_array_constructors failed: initializer_list size != 4");
37        }
38
39        std::cout << " -> tests passed" << std::endl;
40    }
41
42    void pd_test_boolean_array_na_handling() {
43        std::cout << "========= BooleanArray: NA handling ======================= ";
44
45        pandas::BooleanArray arr({
46            std::optional<bool>(true),
47            std::nullopt,  // NA at index 1
48            std::optional<bool>(false)
49        });
50
51        if (!arr.is_na(1)) {
52            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54        }
arr (pd_test_1_all.cpp:45)
35            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : initializer_list size != 4" << std::endl;
36            throw std::runtime_error("pd_test_boolean_array_constructors failed: initializer_list size != 4");
37        }
38
39        std::cout << " -> tests passed" << std::endl;
40    }
41
42    void pd_test_boolean_array_na_handling() {
43        std::cout << "========= BooleanArray: NA handling ======================= ";
44
45        pandas::BooleanArray arr({
46            std::optional<bool>(true),
47            std::nullopt,  // NA at index 1
48            std::optional<bool>(false)
49        });
50
51        if (!arr.is_na(1)) {
52            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54        }
arr (pd_test_1_all.cpp:45)
35            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : initializer_list size != 4" << std::endl;
36            throw std::runtime_error("pd_test_boolean_array_constructors failed: initializer_list size != 4");
37        }
38
39        std::cout << " -> tests passed" << std::endl;
40    }
41
42    void pd_test_boolean_array_na_handling() {
43        std::cout << "========= BooleanArray: NA handling ======================= ";
44
45        pandas::BooleanArray arr({
46            std::optional<bool>(true),
47            std::nullopt,  // NA at index 1
48            std::optional<bool>(false)
49        });
50
51        if (!arr.is_na(1)) {
52            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54        }
clear_cache (pd_test_1_all.cpp:19413)
19403            s.mean();
19404            s.min();
19405            s.max();
19406
19407            passed = s.has_cached_values() == true;
19408            if (!passed) {
19409                std::cout << "  [FAIL] : in pd_test_series_cache() : cache not populated" << std::endl;
19410                throw std::runtime_error("pd_test_series_cache failed: cache not populated");
19411            }
19412
19413            s.clear_cache();
19414            passed = s.has_cached_values() == false;
19415            if (!passed) {
19416                std::cout << "  [FAIL] : in pd_test_series_cache() : cache not cleared" << std::endl;
19417                throw std::runtime_error("pd_test_series_cache failed: cache not cleared");
19418            }
19419
19420            std::cout << " -> tests passed" << std::endl;
19421        }
19422
19423        void pd_test_series_string_repr() {
clone (pd_test_1_all.cpp:5776)
5766    std::cout << " -> tests passed" << std::endl;
5767}
5768
5769void pd_test_categorical_index_clone() {
5770    std::cout << "========= clone =======================================";
5771
5772    pandas::CategoricalArray arr({"p", "q", "r"});
5773    pandas::CategoricalIndex idx(arr, "original");
5774
5775    std::unique_ptr<pandas::IndexBase> cloned = idx.clone();
5776
5777    bool passed = (cloned != nullptr && cloned->size() == idx.size() &&
5778                   cloned->name() == idx.name());
5779    if (!passed) {
5780        std::cout << "  [FAIL] : in pd_test_categorical_index_clone()" << std::endl;
5781        throw std::runtime_error("pd_test_categorical_index_clone failed");
5782    }
5783
5784    std::cout << " -> tests passed" << std::endl;
5785}
freq_string (pd_test_1_all.cpp:15733)
15723    bool passed = (isnan_mask.size() == 2);
15724    if (!passed) {
15725        std::cout << "  [FAIL] : in pd_test_period_index_isnan() : _isnan check failed" << std::endl;
15726        throw std::runtime_error("pd_test_period_index_isnan failed");
15727    }
15728
15729    std::cout << " -> tests passed" << std::endl;
15730}
15731
15732// ============================================================================
15733// unit() and freq_string() Tests
15734// ============================================================================
15735
15736void pd_test_unit_freq_string() {
15737    std::cout << "========= unit() / freq_string() =========================";
15738
15739    // PeriodIndex with monthly frequency
15740    numpy::NDArray<numpy::int64> data(std::vector<size_t>{1});
15741    data.setElementAt({0}, numpy::int64(0));
15742
15743    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{1});
has_cached_values (pd_test_1_all.cpp:19395)
19385            }
19386
19387            std::cout << " -> tests passed" << std::endl;
19388        }
19389
19390        void pd_test_series_cache() {
19391            std::cout << "========= cache management =========================================";
19392
19393            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
19394
19395            bool passed = s.has_cached_values() == false;
19396            if (!passed) {
19397                std::cout << "  [FAIL] : in pd_test_series_cache() : initial cache not empty" << std::endl;
19398                throw std::runtime_error("pd_test_series_cache failed: initial cache not empty");
19399            }
19400
19401            // Trigger cache
19402            s.sum();
19403            s.mean();
19404            s.min();
19405            s.max();
inferred_type (pd_test_1_all.cpp:5270)
5260}
5261
5262void pd_test_categorical_index_array_constructor() {
5263    std::cout << "========= array constructor ===========================";
5264
5265    pandas::CategoricalArray arr({"apple", "banana", "apple", "cherry"});
5266    pandas::CategoricalIndex idx(arr, "fruits");
5267
5268    bool passed = (idx.size() == 4 && !idx.empty() &&
5269                   idx.name().has_value() && *idx.name() == "fruits" &&
5270                   idx.inferred_type() == "categorical");
5271    if (!passed) {
5272        std::cout << "  [FAIL] : in pd_test_categorical_index_array_constructor()" << std::endl;
5273        throw std::runtime_error("pd_test_categorical_index_array_constructor failed");
5274    }
5275
5276    std::cout << " -> tests passed" << std::endl;
5277}
5278
5279void pd_test_categorical_index_values_constructor() {
5280    std::cout << "========= values constructor ==========================";
repr (pd_test_1_all.cpp:10906)
10896    std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900    std::cout << "========= repr =========================";
10901
10902    pandas::CategoricalArray arr({"a", "b", "c"});
10903    // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904    pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906    std::string repr_str = idx.repr();
10907
10908    bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909    if (!passed) {
10910        std::cout << "  [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911        throw std::runtime_error("pd_test_extension_index_repr failed");
10912    }
10913
10914    std::cout << " -> tests passed" << std::endl;
10915}
resolution (pd_test_1_all.cpp:15536)
15526    }
15527
15528    std::cout << " -> tests passed" << std::endl;
15529}
15530
15531// ============================================================================
15532// Resolution Tests
15533// ============================================================================
15534
15535void pd_test_datetime_index_resolution() {
15536    std::cout << "========= DatetimeIndex resolution() =========================";
15537
15538    numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{1});
15539    data.setElementAt({0}, numpy::datetime64(1000LL, numpy::DateTimeUnit::Nanosecond));
15540
15541    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{1});
15542    mask.setElementAt({0}, numpy::bool_(false));
15543
15544    pandas::DatetimeArray arr(data, mask);
15545    pandas::DatetimeIndexBase idx(arr);
result (pd_test_1_all.cpp:15406)
15396    data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397    data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400    mask.setElementAt({0}, numpy::bool_(false));
15401    mask.setElementAt({1}, numpy::bool_(false));
15402
15403    pandas::DatetimeArray arr(data, mask);
15404    pandas::DatetimeIndexBase idx(arr, "original");
15405
15406    // Create join result (int64 values)
15407    numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408    join_result.setElementAt({0}, numpy::int64(500LL));
15409    join_result.setElementAt({1}, numpy::int64(600LL));
15410    join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412    auto new_idx = idx._from_join_target(join_result);
15413
15414    bool passed = (new_idx.size() == 3 &&
15415                   new_idx.name().has_value() && *new_idx.name() == "original");
15416    if (!passed) {
type_id (pd_test_3_all.cpp:25592)
25582// ------------------- pd_test_value_classify (end) ------------------
25583
25584// ------------------- pd_test_index_type_id (start) ------------------
25585namespace dataframe_tests_index_type_id {
25586
25587void pd_test_index_type_id_dispatch() {
25588    std::cout << "========= IndexTypeId dispatch =======================";
25589
25590    // RangeIndex
25591    ::pandas::RangeIndex ri(0, 5);
25592    if (ri.type_id() != ::pandas::IndexTypeId::RangeIndex)
25593        throw std::runtime_error("RangeIndex type_id failed");
25594
25595    // Index<string>
25596    ::pandas::Index<std::string> si(std::vector<std::string>{"a", "b", "c"});
25597    if (si.type_id() != ::pandas::IndexTypeId::IndexString)
25598        throw std::runtime_error("Index<string> type_id failed");
25599
25600    // Index<int64>
25601    ::pandas::Index<numpy::int64> ii(std::vector<numpy::int64>{1, 2, 3});
25602    if (ii.type_id() != ::pandas::IndexTypeId::IndexInt64)
unit (pd_test_1_all.cpp:9284)
9274    data.setElementAt({0}, numpy::datetime64(1000LL, numpy::DateTimeUnit::Nanosecond));
9275    data.setElementAt({1}, numpy::datetime64(2000LL, numpy::DateTimeUnit::Nanosecond));
9276
9277    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
9278    mask.setElementAt({0}, numpy::bool_(false));
9279    mask.setElementAt({1}, numpy::bool_(false));
9280
9281    pandas::DatetimeArray arr(data, mask);
9282    pandas::DatetimeTDMixin idx(arr);
9283
9284    std::string unit = idx.unit();
9285
9286    bool passed = (unit == "ns");  // nanosecond
9287    if (!passed) {
9288        std::cout << "  [FAIL] : in pd_test_datetime_unit_property() : unit property check failed, got '" << unit << "'" << std::endl;
9289        throw std::runtime_error("pd_test_datetime_unit_property failed");
9290    }
9291
9292    std::cout << " -> tests passed" << std::endl;
9293}