NDFrameBase#

class pandas::NDFrameBase#

Core data container class in the pandas namespace.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use NDFrameBase
NDFrameBase obj;
// ... operations ...

Construction#

Signature

Return Type

Location

Example

virtual std::unique_ptr<NDFrameBase> create_nan_filled(size_t n) const = 0

virtual std::unique_ptr<NDFrameBase>

pd_ndframe_base.h:477

Indexing / Selection#

Signature

Return Type

Location

Example

virtual const Attrs& attrs() const = 0

virtual const Attrs&

pd_ndframe_base.h:173

View

virtual Attrs& attrs() = 0

virtual Attrs&

pd_ndframe_base.h:179

View

virtual const std::vector<std::string>& get_cat_categories() const

virtual const std::vector<std::string>&

pd_ndframe_base.h:422

View

virtual std::string get_cat_categories_dtype() const

virtual std::string

pd_ndframe_base.h:446

virtual bool get_value_bool(size_t idx) const = 0

virtual bool

pd_ndframe_base.h:351

View

virtual double get_value_double(size_t idx) const = 0

virtual double

pd_ndframe_base.h:282

View

virtual std::string get_value_str(size_t idx) const = 0

virtual std::string

pd_ndframe_base.h:256

View

virtual bool mask_at(size_t) const

virtual bool

pd_ndframe_base.h:145

View

virtual void set_value_double(size_t idx, double value) = 0

virtual void

pd_ndframe_base.h:364

virtual void set_value_nan(size_t idx) = 0

virtual void

pd_ndframe_base.h:357

View

virtual void set_value_str(size_t idx, const std::string& value)

virtual void

pd_ndframe_base.h:371

virtual std::unique_ptr<NDFrameBase> take_indices(const std::vector<size_t>& indices) const = 0

virtual std::unique_ptr<NDFrameBase>

pd_ndframe_base.h:494

Data Manipulation#

Signature

Return Type

Location

Example

virtual std::unique_ptr<NDFrameBase> reindex_with_indexer(const numpy::NDArray<numpy::int64>& indexer) const = 0

virtual std::unique_ptr<NDFrameBase>

pd_ndframe_base.h:502

View

virtual std::unique_ptr<NDFrameBase> reindex_with_indexer_as( const std::string& target_dtype, const numpy::NDArray<numpy::int64>& indexer) const

virtual std::unique_ptr<NDFrameBase>

pd_ndframe_base.h:527

virtual void replace_value(double to_replace, double value) = 0

virtual void

pd_ndframe_base.h:344

virtual void set_index(std::unique_ptr<IndexBase> new_index) = 0

virtual void

pd_ndframe_base.h:232

View

Missing Data#

Signature

Return Type

Location

Example

virtual void fillna_double(double value) = 0

virtual void

pd_ndframe_base.h:330

virtual void fillna_string(const std::string& value) { (void)value

virtual void

pd_ndframe_base.h:337

View

Statistics#

Signature

Return Type

Location

Example

virtual size_t count() const = 0

virtual size_t

pd_ndframe_base.h:317

View

virtual int max_decimal_places() const

virtual int

pd_ndframe_base.h:268

double sum() const

double

pd_ndframe_base.h:288

View

Reshaping#

Signature

Return Type

Location

Example

DataFrame unstack(int level = -1) const

DataFrame

pd_ndframe_base.h:470

View

Combining#

Signature

Return Type

Location

Example

virtual std::unique_ptr<NDFrameBase> concat_with(const NDFrameBase& other) const = 0

virtual std::unique_ptr<NDFrameBase>

pd_ndframe_base.h:487

I/O#

Signature

Return Type

Location

Example

virtual std::string to_string() const = 0

virtual std::string

pd_ndframe_base.h:544

View

virtual std::vector<std::string> to_string_vector() const = 0

virtual std::vector<std::string>

pd_ndframe_base.h:301

View

Conversion#

Signature

Return Type

Location

Example

virtual std::unique_ptr<NDFrameBase> astype_dtype(const std::string& dtype_str) const

virtual std::unique_ptr<NDFrameBase>

pd_ndframe_base.h:513

View

void copy_frame_flags_from(const NDFrameBase& src)

void

pd_ndframe_base.h:591

virtual void copy_value_from(size_t src_idx, size_t dst_idx) = 0

virtual void

pd_ndframe_base.h:384

Type Checking#

Signature

Return Type

Location

Example

bool is_bool_dtype() const

bool

pd_ndframe_base.h:110

virtual bool is_na_at(size_t idx) const = 0

virtual bool

pd_ndframe_base.h:324

View

Other Methods#

Signature

Return Type

Location

Example

virtual bool all(int axis = 0, bool bool_only = false, bool skipna = true) const = 0

virtual bool

pd_ndframe_base.h:397

View

virtual bool all_values_whole_number() const

virtual bool

pd_ndframe_base.h:262

View

virtual bool any(int axis = 0, bool bool_only = false, bool skipna = true) const = 0

virtual bool

pd_ndframe_base.h:406

View

virtual std::vector<const IndexBase\*> axes() const = 0

virtual std::vector<const IndexBase*>

pd_ndframe_base.h:245

View

size_t cache_memory_usage() const override

size_t

pd_ndframe_base.h:569

virtual bool cat_ordered() const

virtual bool

pd_ndframe_base.h:436

View

void clear_cache() const override = 0

void

pd_ndframe_base.h:559

View

virtual void clear_dtype_override()

virtual void

pd_ndframe_base.h:124

virtual std::unique_ptr<NDFrameBase> clone() const = 0

virtual std::unique_ptr<NDFrameBase>

pd_ndframe_base.h:461

View

virtual std::string dtype_name() const = 0

virtual std::string

pd_ndframe_base.h:99

View

virtual std::string dtype_name_full() const { return dtype_name()

virtual std::string

pd_ndframe_base.h:105

View

virtual bool empty() const = 0

virtual bool

pd_ndframe_base.h:93

View

virtual const Flags& flags() const = 0

virtual const Flags&

pd_ndframe_base.h:191

View

bool has_cached_values() const override = 0

bool

pd_ndframe_base.h:564

View

virtual bool has_cat_categories() const

virtual bool

pd_ndframe_base.h:416

View

virtual bool has_mask() const

virtual bool

pd_ndframe_base.h:140

View

virtual bool hasnans() const = 0

virtual bool

pd_ndframe_base.h:311

View

virtual const IndexBase& index() const = 0

virtual const IndexBase&

pd_ndframe_base.h:225

View

virtual std::optional<std::string> name() const

virtual std::optional<std::string>

pd_ndframe_base.h:209

View

virtual size_t nbytes() const = 0

virtual size_t

pd_ndframe_base.h:163

View

virtual size_t ndim() const = 0

virtual size_t

pd_ndframe_base.h:157

View

virtual std::string repr() const = 0

virtual std::string

pd_ndframe_base.h:550

View

virtual void set_attrs(const Attrs& attrs) = 0

virtual void

pd_ndframe_base.h:185

virtual void set_cat_categories(const std::vector<std::string>& /\*cats\*/)

virtual void

pd_ndframe_base.h:431

View

virtual void set_cat_categories_dtype(const std::string& /\*dtype\*/)

virtual void

pd_ndframe_base.h:451

virtual void set_cat_ordered(bool /\*ordered\*/)

virtual void

pd_ndframe_base.h:441

View

virtual void set_dtype_override(const std::string&)

virtual void

pd_ndframe_base.h:119

View

virtual void set_flags(const Flags& flags, bool copy = true, bool allows_duplicate_labels = true) = 0

virtual void

pd_ndframe_base.h:199

View

virtual void set_name(const std::optional<std::string>& /\*name\*/)

virtual void

pd_ndframe_base.h:215

View

virtual void set_sparse_fill_value(double)

virtual void

pd_ndframe_base.h:135

void set_string_na_sentinel_disabled(bool v)

void

pd_ndframe_base.h:583

View

virtual std::vector<size_t> shape() const = 0

virtual std::vector<size_t>

pd_ndframe_base.h:151

View

virtual size_t size() const = 0

virtual size_t

pd_ndframe_base.h:87

View

bool string_na_sentinel_disabled() const

bool

pd_ndframe_base.h:584

View

Code Examples#

The following examples are extracted from the test suite.

attrs (pd_test_1_all.cpp:16361)
16351        // =====================================================================
16352        // Series Attrs Integration Tests
16353        // =====================================================================
16354
16355        void pd_test_ndframe_series_attrs() {
16356            std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358            pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360            // Test setting attrs on Series
16361            s.attrs().set("source", std::string("test_data"));
16362            s.attrs().set("timestamp", 1234567890);
16363
16364            bool passed = s.attrs().get<std::string>("source") == "test_data";
16365            if (!passed) {
16366                std::cout << "  [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367                throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368            }
16369
16370            passed = s.attrs().get<int>("timestamp") == 1234567890;
16371            if (!passed) {
attrs (pd_test_1_all.cpp:16361)
16351        // =====================================================================
16352        // Series Attrs Integration Tests
16353        // =====================================================================
16354
16355        void pd_test_ndframe_series_attrs() {
16356            std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358            pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360            // Test setting attrs on Series
16361            s.attrs().set("source", std::string("test_data"));
16362            s.attrs().set("timestamp", 1234567890);
16363
16364            bool passed = s.attrs().get<std::string>("source") == "test_data";
16365            if (!passed) {
16366                std::cout << "  [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367                throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368            }
16369
16370            passed = s.attrs().get<int>("timestamp") == 1234567890;
16371            if (!passed) {
get_cat_categories (pd_test_2_all.cpp:20374)
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
20377    check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
20381    std::cout << "pd_test_getitem_dispatch_index_propagation" << std::endl;
20382
20383    // Test DatetimeIndex freq propagation
20384    pandas::DataFrame df;
get_value_bool (pd_test_5_all.cpp:35197)
35187    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35188    pandas_tests::check(df["X"].get_value_double(0) == 1.0, "case_2.idx0_one", local_fail);
35189    pandas_tests::check(std::isnan(df["X"].get_value_double(1)),
35190                        "case_2.idx1_nan", local_fail);
35191    pandas_tests::check(df["X"].get_value_double(2) == 0.0, "case_2.idx2_zero", local_fail);
35192}
35193
35194void bool_nullable_826495_case_3_get_value_bool_mask_aware(int& local_fail) {
35195    pandas::DataFrame df;
35196    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35197    pandas_tests::check(df["X"].get_value_bool(0) == true,  "case_3.idx0_true",   local_fail);
35198    pandas_tests::check(df["X"].get_value_bool(1) == false, "case_3.idx1_NA_false", local_fail);
35199    pandas_tests::check(df["X"].get_value_bool(2) == false, "case_3.idx2_false",  local_fail);
35200}
35201
35202void bool_nullable_826495_case_4_is_na_at_mask_aware(int& local_fail) {
35203    pandas::DataFrame df;
35204    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35205    pandas_tests::check(df["X"].is_na_at(0) == false, "case_4.idx0_not_na", local_fail);
35206    pandas_tests::check(df["X"].is_na_at(1) == true,  "case_4.idx1_is_na",  local_fail);
35207    pandas_tests::check(df["X"].is_na_at(2) == false, "case_4.idx2_not_na", local_fail);
get_value_double (pd_test_2_all.cpp:19160)
19150    std::map<std::string, std::string> col_funcs;
19151    col_funcs["a"] = "sum";
19152    col_funcs["b"] = "mean";
19153
19154    pandas::Series<numpy::float64> result = df.agg_to_series(col_funcs);
19155
19156    // a.sum() = 10.0, b.mean() = 25.0
19157    check(result.size() == 2, "result_size_2");
19158
19159    // std::map iterates in alphabetical order: a, b
19160    check(std::abs(result.get_value_double(0) - 10.0) < 1e-9, "a_sum_10");
19161    check(std::abs(result.get_value_double(1) - 25.0) < 1e-9, "b_mean_25");
19162
19163    // Check index labels
19164    check(result.index().get_value_str(0) == "a", "index_0_a");
19165    check(result.index().get_value_str(1) == "b", "index_1_b");
19166}
19167
19168void pd_test_agg_dispatch_dict_simple_single_col() {
19169    std::cout << "  -- pd_test_agg_dispatch_dict_simple_single_col --" << std::endl;
get_value_str (pd_test_1_all.cpp:4665)
4655            auto corr_df = df.corr();
4656
4657            // Check dimensions
4658            bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659            if (!passed) {
4660                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662            }
4663
4664            // Diagonal should be 1.0
4665            std::string aa = corr_df["A"].get_value_str(0);
4666            passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667            if (!passed) {
4668                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
4669                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: diagonal should be 1.0");
4670            }
4671
4672            // A-B correlation should be 1.0 (perfect correlation)
4673            std::string ab = corr_df["B"].get_value_str(0);
4674            passed = std::abs(std::stod(ab) - 1.0) < 0.001;
4675            if (!passed) {
mask_at (pd_test_3_all.cpp:27712)
27702        fail++;
27703    } else {
27704        if (bool_s->dtype_name() != "boolean") {
27705            std::cout << "    FAIL: dtype should be boolean, got " << bool_s->dtype_name() << std::endl;
27706            fail++;
27707        }
27708        if (!bool_s->has_mask()) {
27709            std::cout << "    FAIL: should have mask for NA" << std::endl;
27710            fail++;
27711        } else {
27712            if (!bool_s->mask_at(2)) {
27713                std::cout << "    FAIL: position 2 should be masked (NA)" << std::endl;
27714                fail++;
27715            }
27716        }
27717    }
27718
27719    if (fail == 0) std::cout << "    OK" << std::endl;
27720}
27721
27722void pd_test_astype_to_string() {
set_value_nan (pd_test_5_all.cpp:18478)
18468        "0    a\n"
18469        "1  NaN\n"
18470        "2    c";
18471    bool ok = (actual == expected);
18472    pandas_tests::check(ok, "where_mask_dtype_promotion_2_503514_case_10_str_col_where_default.to_string", local_fail);
18473    if (!ok) dump_diff("case_10", expected, actual);
18474}
18475
18476void where_mask_dtype_promotion_2_503514_case_11_get_value_str_mask_int_renders_NaN(int& local_fail) {
18477    pandas::Series<std::int64_t> s({10, 20, 30});
18478    s.set_value_nan(0);
18479
18480    std::string actual = s.get_value_str(0);
18481    std::string expected = "NaN";
18482    bool ok = (actual == expected);
18483    pandas_tests::check(ok, "where_mask_dtype_promotion_2_503514_case_11_get_value_str_mask_int_renders_NaN (got " +
18484          actual + ")", local_fail);
18485
18486    bool ok1 = (s.get_value_str(1) == "20");
18487    bool ok2 = (s.get_value_str(2) == "30");
18488    pandas_tests::check(ok1, "case_11.kept_idx1_eq_20", local_fail);
reindex_with_indexer (pd_test_5_all.cpp:40388)
40378    s.set_dtype_override("boolean");
40379    s.set_freq(std::optional<std::string>("D"));
40380    s.set_string_na_sentinel_disabled(true);
40381
40382    // Indexer: identity over the 3 source positions.
40383    numpy::NDArray<numpy::int64> indexer(std::vector<size_t>{3});
40384    indexer.setElementAt({0}, 0);
40385    indexer.setElementAt({1}, 1);
40386    indexer.setElementAt({2}, 2);
40387
40388    auto base = s.reindex_with_indexer(indexer);
40389    pandas_tests::check(base != nullptr, "case7.reindex_with_indexer_nonnull", local_fail);
40390    if (!base) return;
40391
40392    auto* r = dynamic_cast<pandas::Series<std::int64_t>*>(base.get());
40393    pandas_tests::check(r != nullptr, "case7.reindex_with_indexer_is_Series_int64",
40394                        local_fail);
40395    if (!r) return;
40396
40397    // dtype_override propagates (oracle says yes).
40398    pandas_tests::check(r->dtype_override().has_value() &&
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
fillna_string (pd_test_5_all.cpp:47965)
47955                                            "NaT", "null", "<NA>", "x", ""});
47956        auto& col = df["col"];
47957        for (size_t r = 0; r < df.nrows(); ++r) {
47958            std::cout << tag << "  [" << r << "] val=\""
47959                      << col.get_value_str(r) << "\" is_na_at="
47960                      << col.is_na_at(r) << "\n";
47961        }
47962        // CROSS-REFERENCE: pd_series.h:1938 lists only ""/None/nan/NaN as NA
47963        // for Series<std::string>; "NA"/"NaT"/"null"/"<NA>" are NOT treated
47964        // as NA by is_na_at.  This interacts with the fillna bug (item #1):
47965        // fillna_string (pd_series.h:1995) shares the SAME list.
47966    } catch (const std::exception& e) {
47967        std::cout << tag << " exception: " << e.what() << "\n";
47968    }
47969    std::cout << tag << " === end ===\n";
47970}
47971
47972static void P33_forced_object_sentinels() {
47973    const std::string tag = "[P33]";
47974    std::cout << "\n" << tag
47975              << " === dtype='object' with 'NaT'/'null' literals (residual bug?) ===\n";
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
sum (pd_test_1_all.cpp:276)
266        }
267
268        // Test sum/mean
269        pandas::BooleanArray arr({
270            std::optional<bool>(true),
271            std::optional<bool>(false),
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
unstack (pd_test_3_all.cpp:1739)
1729    }
1730    if (s.size() != 3) {
1731        std::cout << "  [FAIL] : in pd_test_3_all_chainable_mutators() : Case H size" << std::endl;
1732        throw std::runtime_error("pd_test_3_all_chainable_mutators failed: Case H size");
1733    }
1734
1735    std::cout << " -> tests passed" << std::endl;
1736}
1737
1738void pd_test_3_all_dataframe_unstack() {
1739    std::cout << "========= DataFrame.unstack() ========================";
1740
1741    std::map<std::string, std::vector<double>> data = {
1742        {"A", {1.0, 2.0, 3.0}},
1743        {"B", {4.0, 5.0, 6.0}}
1744    };
1745    pandas::DataFrame df(data);
1746
1747    // Without MultiIndex, unstack() returns self (matches pandas behavior)
1748    pandas::DataFrame result = df.unstack();
to_string (pd_test_1_all.cpp:2693)
2683        pandas::PeriodArray arr_m(std::vector<std::string>{
2684            "2020-01",
2685            "NaT",
2686            "2025-06"
2687        }, "M");
2688
2689        // Year
2690        auto years = arr_m.year();
2691        auto y0 = years[0];
2692        if (!y0.has_value() || y0.value() != 2020) {
2693            std::cout << "  [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695        }
2696
2697        auto y1 = years[1];
2698        if (y1.has_value()) {
2699            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701        }
2702
2703        auto y2 = years[2];
to_string_vector (pd_test_1_all.cpp:10871)
10861    std::cout << " -> tests passed" << std::endl;
10862}
10863
10864void pd_test_extension_index_to_string_vector() {
10865    std::cout << "========= to_string_vector =========================";
10866
10867    pandas::CategoricalArray arr({"a", std::nullopt, "c"});
10868    pandas::CategoricalIndex idx(arr);
10869
10870    auto str_vec = idx.to_string_vector();
10871
10872    bool passed = (str_vec.size() == 3 &&
10873                   str_vec[0] == "a" && str_vec[1] == "NA" && str_vec[2] == "c");
10874    if (!passed) {
10875        std::cout << "  [FAIL] : in pd_test_extension_index_to_string_vector() : to_string_vector check failed" << std::endl;
10876        throw std::runtime_error("pd_test_extension_index_to_string_vector failed");
10877    }
10878
10879    std::cout << " -> tests passed" << std::endl;
10880}
astype_dtype (pd_test_5_all.cpp:43633)
43623        "0  a\n"
43624        "1  b\n"
43625        "2  c";
43626    check_case("dtype_extension_dt_complex_fallback_925116_case_6",
43627               df, actual, expected, "string", local_fail);
43628}
43629
43630void f_dtype_extension_dt_complex_fallback_925116_case_7_series_string_astype_string_drops_override(int& local_fail) {
43631    std::cout << "-- case_7_series_string_astype_string_drops_override\n";
43632    pandas::Series<std::string> s({"a", "b", "c"});
43633    auto r_box = s.astype_dtype("string");
43634    auto* r = dynamic_cast<pandas::Series<std::string>*>(r_box.get());
43635    if (r == nullptr) {
43636        pandas_tests::check(false, "case_7.astype_returned_non_string_series", local_fail);
43637        return;
43638    }
43639    pandas::DataFrame df = r->to_frame(std::optional<std::string>("v"));
43640    std::string actual = df.to_string();
43641
43642    std::cout << "    src_dtype=" << show_dtype(s)
43643              << " astype_result_dtype=" << show_dtype(*r) << "\n";
is_na_at (pd_test_5_all.cpp:35205)
35195    pandas::DataFrame df;
35196    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35197    pandas_tests::check(df["X"].get_value_bool(0) == true,  "case_3.idx0_true",   local_fail);
35198    pandas_tests::check(df["X"].get_value_bool(1) == false, "case_3.idx1_NA_false", local_fail);
35199    pandas_tests::check(df["X"].get_value_bool(2) == false, "case_3.idx2_false",  local_fail);
35200}
35201
35202void bool_nullable_826495_case_4_is_na_at_mask_aware(int& local_fail) {
35203    pandas::DataFrame df;
35204    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35205    pandas_tests::check(df["X"].is_na_at(0) == false, "case_4.idx0_not_na", local_fail);
35206    pandas_tests::check(df["X"].is_na_at(1) == true,  "case_4.idx1_is_na",  local_fail);
35207    pandas_tests::check(df["X"].is_na_at(2) == false, "case_4.idx2_not_na", local_fail);
35208}
35209
35210void bool_nullable_826495_case_5_fillna_preserves_dtype(int& local_fail) {
35211    pandas::DataFrame df;
35212    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35213    pandas_tests::check(df["X"].dtype_name() == "boolean", "case_5.pre_dtype", local_fail);
35214    auto df_filled = df.fillna(1.0);
35215    pandas_tests::check(df_filled["X"].dtype_name() == "boolean",
all (pd_test_1_all.cpp:247)
237        pandas::BooleanArray has_true({
238            std::optional<bool>(false),
239            std::optional<bool>(true)
240        });
241        any_result = has_true.any();
242        if (!any_result.has_value() || !any_result.value()) {
243            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : any() with True" << std::endl;
244            throw std::runtime_error("pd_test_boolean_array_reductions failed: any() with True");
245        }
246
247        // Test all()
248        pandas::BooleanArray all_true({
249            std::optional<bool>(true),
250            std::optional<bool>(true)
251        });
252        auto all_result = all_true.all();
253        if (!all_result.has_value() || !all_result.value()) {
254            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : all() of all True" << std::endl;
255            throw std::runtime_error("pd_test_boolean_array_reductions failed: all() all True");
256        }
all_values_whole_number (pd_test_5_all.cpp:30090)
30080                !src_map_ov.empty() ? src_map_ov : src_ser_dt;
30081            bool is_int_like =
30082                (src_effective.find("int") != std::string::npos ||
30083                 src_effective.find("uint") != std::string::npos);
30084            bool comb_has_col = combined.has_column(flat);
30085            bool comb_hasnans = false, comb_allwhole = false;
30086            std::string comb_dt = "<missing>";
30087            if (comb_has_col) {
30088                const pandas::NDFrameBase& c = combined[flat];
30089                comb_hasnans = c.hasnans();
30090                comb_allwhole = c.all_values_whole_number();
30091                comb_dt = c.dtype_name();
30092            }
30093            bool would_apply = is_int_like && comb_has_col &&
30094                               !comb_hasnans && comb_allwhole;
30095            std::cout << tag << " flat=" << flat
30096                      << " src_effective=" << (src_effective.empty() ? "<empty>" : src_effective)
30097                      << " is_int_like=" << is_int_like
30098                      << " comb_dt=" << comb_dt
30099                      << " comb_hasnans=" << comb_hasnans
30100                      << " comb_allwhole=" << comb_allwhole
any (pd_test_1_all.cpp:226)
216            std::cout << "  [FAIL] : in pd_test_boolean_array_kleene_not() : ~NA should be NA" << std::endl;
217            throw std::runtime_error("pd_test_boolean_array_kleene_not failed: ~NA");
218        }
219
220        std::cout << " -> tests passed" << std::endl;
221    }
222
223    void pd_test_boolean_array_reductions() {
224        std::cout << "========= BooleanArray: reductions ======================= ";
225
226        // Test any()
227        pandas::BooleanArray all_false({
228            std::optional<bool>(false),
229            std::optional<bool>(false)
230        });
231        auto any_result = all_false.any();
232        if (!any_result.has_value() || any_result.value()) {
233            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : any() of all False" << std::endl;
234            throw std::runtime_error("pd_test_boolean_array_reductions failed: any() all False");
235        }
axes (pd_test_1_all.cpp:16602)
16592        // =====================================================================
16593        // Axes Tests
16594        // =====================================================================
16595
16596        void pd_test_ndframe_axes() {
16597            std::cout << "========= axes =================================================" << std::endl;
16598
16599            pandas::Series<double> s({1.0, 2.0, 3.0});
16600
16601            auto axes = s.axes();
16602
16603            bool passed = axes.size() == 1;
16604            if (!passed) {
16605                std::cout << "  [FAIL] : in pd_test_ndframe_axes() : axes count" << std::endl;
16606                throw std::runtime_error("pd_test_ndframe_axes failed: axes count");
16607            }
16608
16609            passed = axes[0]->size() == 3;
16610            if (!passed) {
16611                std::cout << "  [FAIL] : in pd_test_ndframe_axes() : axis size" << std::endl;
cat_ordered (pd_test_2_all.cpp:20373)
20363    std::vector<std::string> svals = {"a", "b", "a", "c"};
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
20377    check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
20381    std::cout << "pd_test_getitem_dispatch_index_propagation" << std::endl;
20382
20383    // Test DatetimeIndex freq propagation
clear_cache (pd_test_1_all.cpp:19413)
19403            s.mean();
19404            s.min();
19405            s.max();
19406
19407            passed = s.has_cached_values() == true;
19408            if (!passed) {
19409                std::cout << "  [FAIL] : in pd_test_series_cache() : cache not populated" << std::endl;
19410                throw std::runtime_error("pd_test_series_cache failed: cache not populated");
19411            }
19412
19413            s.clear_cache();
19414            passed = s.has_cached_values() == false;
19415            if (!passed) {
19416                std::cout << "  [FAIL] : in pd_test_series_cache() : cache not cleared" << std::endl;
19417                throw std::runtime_error("pd_test_series_cache failed: cache not cleared");
19418            }
19419
19420            std::cout << " -> tests passed" << std::endl;
19421        }
19422
19423        void pd_test_series_string_repr() {
clone (pd_test_1_all.cpp:5776)
5766    std::cout << " -> tests passed" << std::endl;
5767}
5768
5769void pd_test_categorical_index_clone() {
5770    std::cout << "========= clone =======================================";
5771
5772    pandas::CategoricalArray arr({"p", "q", "r"});
5773    pandas::CategoricalIndex idx(arr, "original");
5774
5775    std::unique_ptr<pandas::IndexBase> cloned = idx.clone();
5776
5777    bool passed = (cloned != nullptr && cloned->size() == idx.size() &&
5778                   cloned->name() == idx.name());
5779    if (!passed) {
5780        std::cout << "  [FAIL] : in pd_test_categorical_index_clone()" << std::endl;
5781        throw std::runtime_error("pd_test_categorical_index_clone failed");
5782    }
5783
5784    std::cout << " -> tests passed" << std::endl;
5785}
dtype_name (pd_test_1_all.cpp:10104)
10094}
10095
10096void pd_test_extension_index_array_constructor() {
10097    std::cout << "========= array constructor =========================";
10098
10099    pandas::CategoricalArray arr({"apple", "banana", "apple", "cherry"});
10100    pandas::CategoricalIndex idx(arr, "fruits");
10101
10102    bool passed = (idx.size() == 4 && !idx.empty() &&
10103                   idx.name().has_value() && *idx.name() == "fruits" &&
10104                   idx.dtype_name() == "category");
10105    if (!passed) {
10106        std::cout << "  [FAIL] : in pd_test_extension_index_array_constructor() : array constructor check failed" << std::endl;
10107        throw std::runtime_error("pd_test_extension_index_array_constructor failed");
10108    }
10109
10110    std::cout << " -> tests passed" << std::endl;
10111}
10112
10113void pd_test_extension_index_copy_constructor() {
10114    std::cout << "========= copy constructor =========================";
dtype_name_full (pd_test_5_all.cpp:26384)
26374    pandas::DataFrame df;
26375    df.add_column<std::string>("group", {"A", "A", "B"});
26376    df.add_column<bool>("flag", {true, false, true});
26377    // Promote the column's dtype override to the PandasPython-origin sub-type.
26378    df.set_column_dtype("flag", "object:bool");
26379
26380    // Pre-check: dtype_name strips the colon, dtype_name_full keeps it.
26381    pandas_tests::check(df["flag"].dtype_name() == "object",
26382          "b21.pre: df[flag].dtype_name()==object (got '" +
26383          df["flag"].dtype_name() + "')", local_fail);
26384    pandas_tests::check(df["flag"].dtype_name_full() == "object:bool",
26385          "b21.pre: df[flag].dtype_name_full()==object:bool (got '" +
26386          df["flag"].dtype_name_full() + "')", local_fail);
26387
26388    auto gg = df.groupby("group").get_group("A");
26389
26390    // FIX VERIFIED: Option 2 via iloc_rows + take_indices preserves the
26391    // dtype_override ("object:bool"); dtype_name() strips the colon and
26392    // returns "object".
26393    std::string gg_dt = gg["flag"].dtype_name();
26394    std::string gg_dt_full = gg["flag"].dtype_name_full();
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937    void pd_test_config_version() {
938        std::cout << "========= df_config: version info ======================= ";
939        const char* version = pandas::DataFrameInfo::version();
940        if (version == nullptr || std::string(version).empty()) {
941            std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942            throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943        }
944        std::cout << "-> tests passed" << std::endl;
945    }
946
947    void pd_test_config_na_repr() {
948        std::cout << "========= df_config: NA representation ======================= ";
949        const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950        if (na_repr == nullptr) {
flags (pd_test_1_all.cpp:16397)
16387        // =====================================================================
16388        // Series Flags Integration Tests
16389        // =====================================================================
16390
16391        void pd_test_ndframe_series_flags() {
16392            std::cout << "========= series flags integration =============================" << std::endl;
16393
16394            pandas::Series<int> s({1, 2, 3});
16395
16396            // Test default flags
16397            bool passed = s.flags().allows_duplicate_labels == true;
16398            if (!passed) {
16399                std::cout << "  [FAIL] : in pd_test_ndframe_series_flags() : default allows_duplicate_labels" << std::endl;
16400                throw std::runtime_error("pd_test_ndframe_series_flags failed: default allows_duplicate_labels");
16401            }
16402
16403            passed = s.flags().copy_on_write == false;
16404            if (!passed) {
16405                std::cout << "  [FAIL] : in pd_test_ndframe_series_flags() : default copy_on_write" << std::endl;
16406                throw std::runtime_error("pd_test_ndframe_series_flags failed: default copy_on_write");
16407            }
has_cached_values (pd_test_1_all.cpp:19395)
19385            }
19386
19387            std::cout << " -> tests passed" << std::endl;
19388        }
19389
19390        void pd_test_series_cache() {
19391            std::cout << "========= cache management =========================================";
19392
19393            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
19394
19395            bool passed = s.has_cached_values() == false;
19396            if (!passed) {
19397                std::cout << "  [FAIL] : in pd_test_series_cache() : initial cache not empty" << std::endl;
19398                throw std::runtime_error("pd_test_series_cache failed: initial cache not empty");
19399            }
19400
19401            // Trigger cache
19402            s.sum();
19403            s.mean();
19404            s.min();
19405            s.max();
has_cat_categories (pd_test_2_all.cpp:20372)
20362    pandas::DataFrame df;
20363    std::vector<std::string> svals = {"a", "b", "a", "c"};
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
20377    check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
20381    std::cout << "pd_test_getitem_dispatch_index_propagation" << std::endl;
has_mask (pd_test_3_all.cpp:27708)
27698    auto* bool_s = dynamic_cast<pandas::Series<numpy::bool_>*>(result.get());
27699    if (!bool_s) {
27700        std::cout << "    FAIL: expected Series<bool_>" << std::endl;
27701        fail++;
27702    } else {
27703        if (bool_s->dtype_name() != "boolean") {
27704            std::cout << "    FAIL: dtype should be boolean, got " << bool_s->dtype_name() << std::endl;
27705            fail++;
27706        }
27707        if (!bool_s->has_mask()) {
27708            std::cout << "    FAIL: should have mask for NA" << std::endl;
27709            fail++;
27710        } else {
27711            if (!bool_s->mask_at(2)) {
27712                std::cout << "    FAIL: position 2 should be masked (NA)" << std::endl;
27713                fail++;
27714            }
27715        }
27716    }
hasnans (pd_test_1_all.cpp:5363)
5353void pd_test_categorical_index_from_codes() {
5354    std::cout << "========= from_codes =================================";
5355
5356    std::vector<numpy::int32> codes = {0, 1, 0, 2, -1};  // -1 = NA
5357    std::vector<std::string> categories = {"low", "medium", "high"};
5358
5359    pandas::CategoricalIndex idx = pandas::CategoricalIndex::from_codes(codes, categories, true, "level");
5360
5361    bool passed = (idx.size() == 5 && idx.num_categories() == 3 &&
5362                   idx.ordered() && idx.name().has_value() && *idx.name() == "level" &&
5363                   idx.hasnans());  // has NA from code -1
5364    if (!passed) {
5365        std::cout << "  [FAIL] : in pd_test_categorical_index_from_codes()" << std::endl;
5366        throw std::runtime_error("pd_test_categorical_index_from_codes failed");
5367    }
5368
5369    std::cout << " -> tests passed" << std::endl;
5370}
5371
5372void pd_test_categorical_index_simple_new() {
5373    std::cout << "========= _simple_new =================================";
index (pd_test_1_all.cpp:6680)
6670        void pd_test_dataframe_index_ops() {
6671            std::cout << "========= index operations =================";
6672
6673            // Test set_axis (rows)
6674            {
6675                std::map<std::string, std::vector<int>> data;
6676                data["A"] = {1, 2, 3};
6677                pandas::DataFrame df(data);
6678
6679                auto renamed = df.set_axis({"x", "y", "z"}, 0);
6680                std::string idx0 = renamed.index().get_value_str(0);
6681                if (idx0 != "x") {
6682                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6683                    throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
6684                }
6685            }
6686
6687            // Test set_axis (columns)
6688            {
6689                std::map<std::string, std::vector<int>> data;
6690                data["A"] = {1, 2};
name (pd_test_1_all.cpp:295)
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
293
294        pandas::BooleanArray arr;
295        if (arr.dtype().name() != "boolean") {
296            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298        }
299
300        if (arr.dtype().kind() != "b") {
301            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303        }
304
305        std::cout << " -> tests passed" << std::endl;
nbytes (pd_test_1_all.cpp:6214)
6204            }
6205
6206            // Test empty DataFrame
6207            pandas::DataFrame empty_df;
6208            if (!empty_df.empty()) {
6209                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210                throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211            }
6212
6213            // Test nbytes > 0 for non-empty
6214            if (df.nbytes() == 0) {
6215                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216                throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217            }
6218
6219            // Test columns index
6220            if (df.columns().size() != 3) {
6221                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222                throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223            }
ndim (pd_test_1_all.cpp:6195)
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
6199
6200            // Test empty
6201            if (df.empty()) {
6202                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203                throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204            }
repr (pd_test_1_all.cpp:10906)
10896    std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900    std::cout << "========= repr =========================";
10901
10902    pandas::CategoricalArray arr({"a", "b", "c"});
10903    // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904    pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906    std::string repr_str = idx.repr();
10907
10908    bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909    if (!passed) {
10910        std::cout << "  [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911        throw std::runtime_error("pd_test_extension_index_repr failed");
10912    }
10913
10914    std::cout << " -> tests passed" << std::endl;
10915}
set_cat_categories (pd_test_2_all.cpp:20366)
20356    check(sub.columns().get_value_str(0) == "col", "dup col0 name");
20357    check(sub.columns().get_value_str(1) == "col", "dup col1 name");
20358}
20359
20360void pd_test_getitem_dispatch_category_metadata() {
20361    std::cout << "pd_test_getitem_dispatch_category_metadata" << std::endl;
20362    pandas::DataFrame df;
20363    std::vector<std::string> svals = {"a", "b", "a", "c"};
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
set_cat_ordered (pd_test_2_all.cpp:20367)
20357    check(sub.columns().get_value_str(1) == "col", "dup col1 name");
20358}
20359
20360void pd_test_getitem_dispatch_category_metadata() {
20361    std::cout << "pd_test_getitem_dispatch_category_metadata" << std::endl;
20362    pandas::DataFrame df;
20363    std::vector<std::string> svals = {"a", "b", "a", "c"};
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
20377    check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
set_dtype_override (pd_test_2_all.cpp:20225)
20215    std::vector<numpy::float64> vals = {1.0, 2.0, 3.0};
20216    df.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals, "A"), true);
20217
20218    auto t = df.classify_column_access("A");
20219    check(t == pandas::DataFrame::ColumnAccessType::NumericColumn, "float64 -> NumericColumn");
20220
20221    // int64 column
20222    pandas::DataFrame df2;
20223    std::vector<numpy::int64> ivals = {10, 20, 30};
20224    auto iseries = std::make_unique<pandas::Series<numpy::int64>>(ivals, "B");
20225    iseries->set_dtype_override("int64");
20226    df2.insert(0, "B", std::move(iseries), true);
20227    auto t2 = df2.classify_column_access("B");
20228    check(t2 == pandas::DataFrame::ColumnAccessType::NumericColumn, "int64 -> NumericColumn");
20229}
20230
20231void pd_test_getitem_dispatch_classify_bool() {
20232    std::cout << "pd_test_getitem_dispatch_classify_bool" << std::endl;
20233    pandas::DataFrame df;
20234    std::vector<numpy::bool_> bvals = {true, false, true};
20235    df.insert(0, "flag", std::make_unique<pandas::Series<numpy::bool_>>(bvals, "flag"), true);
set_flags (pd_test_1_all.cpp:16410)
16400                throw std::runtime_error("pd_test_ndframe_series_flags failed: default allows_duplicate_labels");
16401            }
16402
16403            passed = s.flags().copy_on_write == false;
16404            if (!passed) {
16405                std::cout << "  [FAIL] : in pd_test_ndframe_series_flags() : default copy_on_write" << std::endl;
16406                throw std::runtime_error("pd_test_ndframe_series_flags failed: default copy_on_write");
16407            }
16408
16409            // Test set_flags
16410            s.set_flags(pandas::Flags(false, true));
16411            passed = s.flags().allows_duplicate_labels == false;
16412            if (!passed) {
16413                std::cout << "  [FAIL] : in pd_test_ndframe_series_flags() : set allows_duplicate_labels" << std::endl;
16414                throw std::runtime_error("pd_test_ndframe_series_flags failed: set allows_duplicate_labels");
16415            }
16416
16417            passed = s.flags().copy_on_write == true;
16418            if (!passed) {
16419                std::cout << "  [FAIL] : in pd_test_ndframe_series_flags() : set copy_on_write" << std::endl;
16420                throw std::runtime_error("pd_test_ndframe_series_flags failed: set copy_on_write");
set_name (pd_test_1_all.cpp:11798)
11788                throw std::runtime_error("pd_test_index_vector_constructor failed");
11789            }
11790
11791            std::cout << " -> tests passed" << std::endl;
11792        }
11793
11794        void pd_test_index_copy_constructor() {
11795            std::cout << "========= copy constructor ============================";
11796
11797            pandas::Index<numpy::int64> idx1{1, 2, 3};
11798            idx1.set_name("original");
11799
11800            pandas::Index<numpy::int64> idx2(idx1);
11801
11802            bool passed = (idx2.size() == 3);
11803            passed = passed && (idx2.name().value() == "original");
11804            passed = passed && idx2.equals(idx1);
11805
11806            if (!passed) {
11807                std::cout << "  [FAIL] : in pd_test_index_copy_constructor() : copy failed" << std::endl;
11808                throw std::runtime_error("pd_test_index_copy_constructor failed");
set_string_na_sentinel_disabled (pd_test_5_all.cpp:40315)
40305        pandas_tests::check(false, "case3.mask_pos2_false", local_fail);
40306    }
40307
40308    std::cout << "  source has_mask=" << s.has_mask()
40309              << " result has_mask=" << r.has_mask() << "\n";
40310}
40311
40312void case_4_frame_flags_propagate(int& local_fail) {
40313    std::cout << "----- case_4_frame_flags_propagate -----\n";
40314    auto s = make_series_3<std::string>({"x", "y", "z"});
40315    s.set_string_na_sentinel_disabled(true);
40316
40317    auto r = s.reindex({"0", "1", "2"});
40318
40319    pandas_tests::check(r.string_na_sentinel_disabled() == true,
40320                        "case4.string_na_sentinel_disabled_propagates", local_fail);
40321
40322    std::cout << "  source flag=" << s.string_na_sentinel_disabled()
40323              << " result flag=" << r.string_na_sentinel_disabled() << "\n";
40324}
shape (pd_test_1_all.cpp:6188)
6178            std::cout << "========= properties =======================";
6179
6180            std::map<std::string, std::vector<numpy::float64>> data;
6181            data["A"] = {1.0, 2.0, 3.0, 4.0};
6182            data["B"] = {5.0, 6.0, 7.0, 8.0};
6183            data["C"] = {9.0, 10.0, 11.0, 12.0};
6184
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
string_na_sentinel_disabled (pd_test_5_all.cpp:40319)
40309              << " result has_mask=" << r.has_mask() << "\n";
40310}
40311
40312void case_4_frame_flags_propagate(int& local_fail) {
40313    std::cout << "----- case_4_frame_flags_propagate -----\n";
40314    auto s = make_series_3<std::string>({"x", "y", "z"});
40315    s.set_string_na_sentinel_disabled(true);
40316
40317    auto r = s.reindex({"0", "1", "2"});
40318
40319    pandas_tests::check(r.string_na_sentinel_disabled() == true,
40320                        "case4.string_na_sentinel_disabled_propagates", local_fail);
40321
40322    std::cout << "  source flag=" << s.string_na_sentinel_disabled()
40323              << " result flag=" << r.string_na_sentinel_disabled() << "\n";
40324}
40325
40326void case_5_index_name_propagates(int& local_fail) {
40327    std::cout << "----- case_5_index_name_propagates -----\n";
40328    auto s = make_series_3<std::int64_t>({10, 20, 30});
40329    s.index_mut().set_name(std::optional<std::string>("idx_name"));