DataFrame#

class pandas::DataFrame#

Core data container class in the pandas namespace.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Create DataFrame
DataFrame df;
df["A"] = {1, 2, 3};
df["B"] = {4.0, 5.0, 6.0};

// Access data
auto shape = df.shape();
auto columns = df.columns();

Constructors#

Signature

Location

Example

DataFrame(const std::map<std::string, std::vector<T>>& data, std::unique_ptr<IndexBase> index = nullptr, const std::string& dtype = "", bool copy = false)

pd_dataframe.h:761

View

DataFrame(const std::vector<Series<T>>& columns, const std::vector<std::string>& column_names, std::unique_ptr<IndexBase> index = nullptr, const std::string& dtype = "", bool copy = false)

pd_dataframe.h:819

View

DataFrame(const numpy::NDArray<T>& data, std::unique_ptr<IndexBase> index = nullptr, const std::vector<std::string>& columns = {}, const std::string& dtype = "", bool copy = false)

pd_dataframe.h:884

View

DataFrame(const DataFrame& other)

pd_dataframe.h:939

View

DataFrame(DataFrame&& other) noexcept

pd_dataframe.h:964

View

DataFrame(const std::map<std::string, std::vector<std::complex<double>>>& cols)

pd_dataframe.h:1078

View

DataFrame(const std::map<std::string, ColumnSource>& cols)

pd_dataframe.h:1123

View

Construction#

Signature

Return Type

Location

Example

std::unique_ptr<NDFrameBase> create_nan_filled(size_t n) const override

std::unique_ptr<NDFrameBase>

pd_dataframe.h:27133

Indexing / Selection#

Signature

Return Type

Location

Example

T at(const std::string& row_label, const std::string& col_label) const

T

pd_dataframe.h:3997

View

DataFrame at_time(const std::string& time, bool asof = false, int axis = 0) const

DataFrame

pd_dataframe.h:26966

View

const Attrs& attrs() const override

const Attrs&

pd_dataframe.h:1825

View

Attrs& attrs() override

Attrs&

pd_dataframe.h:1832

View

DataFrame first(const std::string& offset) const

DataFrame

pd_dataframe.h:27022

View

std::optional<std::string> first_valid_index() const

std::optional<std::string>

pd_dataframe.h:13168

View

std::optional<ReturnType> get(const std::string& key, std::optional<ReturnType> default_value = std::nullopt) const

std::optional<ReturnType>

pd_dataframe.h:13303

View

Series<numpy::bool_> get_column_as_bool_series(const std::string& key) const

Series<numpy::bool_>

pd_dataframe.h:3379

Series<numpy::int64> get_column_as_int64_series(const std::string& key) const

Series<numpy::int64>

pd_dataframe.h:3458

Series<T> get_column_as_series(size_t c, const MultiIndexColumnLabel\* label = nullptr) const

Series<T>

pd_dataframe.h:3149

View

Series<std::string> get_column_as_string_series(const std::string& key) const

Series<std::string>

pd_dataframe.h:3422

View

const Series<numpy::uint64>\* get_column_as_uint64_raw(const std::string& key) const

const Series<numpy::uint64>*

pd_dataframe.h:3510

Series<numpy::float64> get_column_as_uint64_series(const std::string& key) const

Series<numpy::float64>

pd_dataframe.h:3489

size_t get_column_index(const std::string& name) const

size_t

pd_dataframe.h:3282

View

static std::pair<double, double> get_column_range(const NDFrameBase\* col)

static std::pair<double, double>

pd_dataframe.h:22474

std::vector<double> get_column_values_as_double_(size_t col_idx) const

std::vector<double>

pd_dataframe.h:23099

const std::vector<std::vector<std::string>>& get_columns_level_categories() const

const std::vector<std::vector<std::string>>&

pd_dataframe.h:1537

const std::vector<bool>& get_columns_level_ordered() const

const std::vector<bool>&

pd_dataframe.h:1544

DataFrame get_dummies( const std::vector<std::string>& columns_to_encode = {}, const std::string& prefix_str = "", const std::vector<std::string>& prefix_list = {}, const std::map<std::string, std::string>& prefix_dict = {}, bool prefix_is_list = false, bool prefix_is_dict = false, const std::string& prefix_sep = "_", bool dummy_na = false, bool drop_first = false, const std::string& dtype_str = "") const

DataFrame

pd_dataframe.h:7226

View

DataFrame get_duplicate_columns(const std::string& key) const

DataFrame

pd_dataframe.h:3347

View

DataFrame get_multiindex_column_group(const std::string& key) const

DataFrame

pd_dataframe.h:3562

size_t get_num_levels(int axis) const

size_t

pd_dataframe.h:15914

std::optional<const NDFrameBase\*> get_optional(const std::string& key) const

std::optional<const NDFrameBase*>

pd_dataframe.h:6753

View

Series<T> get_series(const std::string& col_name) const

Series<T>

pd_dataframe.h:3630

View

bool get_value_bool(size_t idx) const override

bool

pd_dataframe.h:8435

View

DataFrame head(size_t n = 5) const

DataFrame

pd_dataframe.h:4016

View

T iat(size_t row, size_t col_idx) const

T

pd_dataframe.h:4005

View

Series<std::string> idxmax(int axis = 0, bool skipna = true, bool numeric_only = false) const

Series<std::string>

pd_dataframe.h:10501

View

Series<std::string> idxmin(int axis = 0, bool skipna = true, bool numeric_only = false) const

Series<std::string>

pd_dataframe.h:10587

View

DataFrame iloc(size_t start, size_t stop) const

DataFrame

pd_dataframe.h:3794

View

T iloc(size_t row, size_t col_idx) const

T

pd_dataframe.h:3802

View

DataFrame iloc(const std::vector<size_t>& row_indices, const std::vector<size_t>& col_indices) const

DataFrame

pd_dataframe.h:3983

View

DataFrame iloc_cols(const std::vector<size_t>& col_indices) const

DataFrame

pd_dataframe.h:3969

DataFrame iloc_row(size_t row) const

DataFrame

pd_dataframe.h:3820

DataFrame iloc_rows(size_t start, size_t stop, size_t step = 1) const

DataFrame

pd_dataframe.h:3830

View

DataFrame iloc_rows(const std::vector<size_t>& row_indices) const

DataFrame

pd_dataframe.h:3865

View

DataFrame last(const std::string& offset) const

DataFrame

pd_dataframe.h:27029

View

std::optional<std::string> last_valid_index() const

std::optional<std::string>

pd_dataframe.h:13188

View

T loc(const std::string& row_label, const std::string& col_label) const

T

pd_dataframe.h:3709

View

DataFrame loc(const std::string& start_label, const std::string& end_label) const

DataFrame

pd_dataframe.h:3718

View

LocProxy loc(const std::string& label1, const std::string& label2)

LocProxy

pd_dataframe.h:3730

View

DataFrame loc(const std::string& first_level_label) const

DataFrame

pd_dataframe.h:3737

View

DataFrame loc(const std::vector<std::string>& row_labels, const std::vector<std::string>& col_labels) const

DataFrame

pd_dataframe.h:3781

View

DataFrame loc_row(const std::string& row_label) const

DataFrame

pd_dataframe.h:3759

DataFrame loc_rows(const std::vector<std::string>& row_labels) const

DataFrame

pd_dataframe.h:3767

DataFrame mask(const DataFrame& cond, double other = std::numeric_limits<double>::quiet_NaN(), bool inplace = false, int axis = 0, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:9354

View

DataFrame nlargest(size_t n, const std::string& columns, const std::string& keep = "first") const

DataFrame

pd_dataframe.h:15038

View

DataFrame nsmallest(size_t n, const std::string& columns, const std::string& keep = "first") const

DataFrame

pd_dataframe.h:15051

View

DataFrame query(const std::string& expr, bool inplace = false) const

DataFrame

pd_dataframe.h:9486

View

std::string query_get_value_str(const std::string& name, size_t row) const

std::string

pd_dataframe.h:9539

static double query_str_to_double(const std::string& s)

static double

pd_dataframe.h:9617

DataFrame sample(std::optional<size_t> n = std::nullopt, std::optional<double> frac = std::nullopt, bool replace = false, const std::vector<double>& weights = {}, unsigned int random_state = 0, bool ignore_index = false, std::optional<int> axis = std::nullopt) const

DataFrame

pd_dataframe.h:4040

View

DataFrame sample(T n, unsigned int seed) const

DataFrame

pd_dataframe.h:4141

View

DataFrame sample(T n, unsigned int seed) const

DataFrame

pd_dataframe.h:4155

View

DataFrame sample(std::optional<size_t> n, std::optional<double> frac, bool replace, unsigned int random_state) const

DataFrame

pd_dataframe.h:4176

View

DataFrame sample(std::optional<size_t> n, std::optional<double> frac, bool replace, unsigned int random_state, bool ignore_index) const

DataFrame

pd_dataframe.h:4187

View

DataFrame sample_frac(double frac, unsigned int seed = 0) const

DataFrame

pd_dataframe.h:4168

View

void set_value_double(size_t idx, double value) override

void

pd_dataframe.h:8465

void set_value_nan(size_t idx) override

void

pd_dataframe.h:8449

View

DataFrame tail(size_t n = 5) const

DataFrame

pd_dataframe.h:4024

View

DataFrame take(const std::vector<size_t>& indices, int axis = 0) const

DataFrame

pd_dataframe.h:4201

View

std::unique_ptr<NDFrameBase> take_indices(const std::vector<size_t>& indices) const override

std::unique_ptr<NDFrameBase>

pd_dataframe.h:27152

DataFrame where(const DataFrame& cond, const Series<numpy::float64>& other, int axis) const

DataFrame

pd_dataframe.h:8953

View

DataFrame where(const DataFrame& cond, const DataFrame& other) const

DataFrame

pd_dataframe.h:8993

View

DataFrame where(const DataFrame& cond, double other = std::numeric_limits<double>::quiet_NaN(), bool inplace = false, int axis = 0, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:9130

View

DataFrame where_resolved(const WhereCondition& cond, const WhereOther& other, int axis) const

DataFrame

pd_dataframe.h:9271

View

DataFrame xs(const std::string& key, int axis = 0, std::optional<int> level = std::nullopt, bool drop_level = true) const

DataFrame

pd_dataframe.h:4274

View

DataFrame xs(const std::string& key, const std::string& axis, std::optional<int> level = std::nullopt, bool drop_level = true) const

DataFrame

pd_dataframe.h:4315

View

DataFrame xs(const std::vector<std::string>& keys, int axis = 0, const std::vector<size_t>& levels = {}, bool drop_level = true) const

DataFrame

pd_dataframe.h:4347

View

DataFrame xs_col(const std::string& key, bool drop_level) const

DataFrame

pd_dataframe.h:4444

DataFrame xs_cols_multi(const std::vector<std::string>& keys, const std::vector<size_t>& levels, bool drop_level) const

DataFrame

pd_dataframe.h:4550

DataFrame xs_level(const std::string& key, const std::string& level_name, bool drop_level = true) const

DataFrame

pd_dataframe.h:4367

View

DataFrame xs_row(const std::string& key, bool drop_level) const

DataFrame

pd_dataframe.h:4392

DataFrame xs_rows_multi(const std::vector<std::string>& keys, const std::vector<size_t>& levels, bool drop_level) const

DataFrame

pd_dataframe.h:4492

Data Manipulation#

Signature

Return Type

Location

Example

DataFrame assign(const std::string& col_name, const std::vector<T>& data) const

DataFrame

pd_dataframe.h:10259

View

DataFrame assign(const std::string& col_name, const Series<numpy::float64>& s) const

DataFrame

pd_dataframe.h:10300

View

DataFrame assign(const std::string& col_name, const Series<std::string>& s) const

DataFrame

pd_dataframe.h:10322

View

DataFrame drop(const std::vector<std::string>& labels, int axis = 0, const std::vector<std::string>& index = {}, const std::vector<std::string>& columns = {}, std::optional<int> level = std::nullopt, bool inplace = false, const std::string& errors = "raise") const

DataFrame

pd_dataframe.h:7177

View

DataFrame drop_duplicates(const std::vector<std::string>& subset = {}, const std::string& keep = "first", bool inplace = false, bool ignore_index = false) const

DataFrame

pd_dataframe.h:10113

View

void drop_inplace(const std::vector<std::string>& labels, int axis = 0)

void

pd_dataframe.h:7243

static std::string drop_level_from_label(const std::string& label, size_t level)

static std::string

pd_dataframe.h:15555

static void drop_values_column_level(DataFrame& result, const std::vector<std::string>& columns)

static void

pd_dataframe.h:16405

drop_values_column_level(result, columns)

pd_dataframe.h:16470

drop_values_column_level(result, columns)

pd_dataframe.h:16957

drop_values_column_level(result, std::vector<std::string>{columns})

pd_dataframe.h:17001

DataFrame droplevel(int level, int axis = 0) const

DataFrame

pd_dataframe.h:15296

View

DataFrame droplevel(int level, const std::string& axis) const

DataFrame

pd_dataframe.h:15313

View

DataFrame droplevel(const std::string& level_name, int axis = 0) const

DataFrame

pd_dataframe.h:15335

View

DataFrame droplevel(const std::vector<int>& levels, int axis = 0) const

DataFrame

pd_dataframe.h:15365

View

DataFrame droplevel_columns(int level) const

DataFrame

pd_dataframe.h:15499

DataFrame droplevel_index(int level) const

DataFrame

pd_dataframe.h:15388

DataFrame dropna(const std::string& how, int axis = 0, const std::vector<std::string>& subset = {}, std::optional<int> thresh = std::nullopt, bool inplace = false, bool ignore_index = false) const

DataFrame

pd_dataframe.h:7518

View

DataFrame dropna(int axis = 0, const std::string& how = "any", std::optional<int> thresh = std::nullopt, const std::vector<std::string>& subset = {}, bool inplace = false, bool ignore_index = false) const

DataFrame

pd_dataframe.h:7525

View

void insert(size_t loc, const std::string& column, std::unique_ptr<NDFrameBase> value, bool allow_duplicates = false)

void

pd_dataframe.h:6806

View

insert(ncols(), name, std::move(s))

pd_dataframe.h:6874

View

insert(ncols(), name, std::make_unique<Series<T>>(data, name))

pd_dataframe.h:6877

View

insert(ncols(), name, std::move(new_col))

pd_dataframe.h:6906

View

insert(ncols(), name, std::move(new_col))

pd_dataframe.h:6942

View

insert(ncols(), name, std::move(new_col))

pd_dataframe.h:6965

View

insert(ncols(), name, std::move(s))

pd_dataframe.h:7006

View

insert(ncols(), name, std::move(s))

pd_dataframe.h:7026

View

insert(ncols(), name, std::move(s))

pd_dataframe.h:7039

View

DataFrame reindex(const std::vector<std::string>& labels = {}, const std::vector<std::string>& index = {}, const std::vector<std::string>& columns = {}, int axis = 0, const std::string& method = "", bool copy = true, std::optional<int> level = std::nullopt, double fill_value = std::nan(""), std::optional<int> limit = std::nullopt, std::optional<double> tolerance = std::nullopt, const std::optional<std::string>& str_fill = std::nullopt) const

DataFrame

pd_dataframe.h:5422

View

DataFrame reindex(const std::vector<std::string>& labels, int axis, double fill_value = std::nan(""), const std::optional<std::string>& str_fill = std::nullopt) const

DataFrame

pd_dataframe.h:6475

View

DataFrame reindex(const std::vector<std::string>& new_labels, pandas::detail::FillKind fill_kind, double numeric_fill = std::numeric_limits<double>::quiet_NaN(), const std::string& string_fill = "") const

DataFrame

pd_dataframe.h:6486

View

DataFrame reindex(const std::vector<std::string>& labels, const std::vector<std::string>& index, int axis, double fill_value) const

DataFrame

pd_dataframe.h:6503

View

DataFrame reindex(const std::vector<std::int64_t>& int_labels, int axis = 0, double fill_value = std::nan("")) const

DataFrame

pd_dataframe.h:6513

View

DataFrame reindex_by_indexer( const numpy::NDArray<numpy::int64>& indexer, const Index<std::string>& new_index, double fill_value) const

DataFrame

pd_dataframe.h:27072

DataFrame reindex_impl(const std::vector<std::string>& labels, int axis, double fill_value, const std::optional<std::string>& str_fill = std::nullopt, const std::optional<FillSpec>& explicit_spec = std::nullopt, const std::string& method = "", std::optional<int> limit = std::nullopt, std::optional<double> tolerance = std::nullopt) const

DataFrame

pd_dataframe.h:5480

DataFrame reindex_like(const DataFrame& other, const std::string& method = "", bool copy = true, std::optional<int> limit = std::nullopt, std::optional<double> tolerance = std::nullopt) const

DataFrame

pd_dataframe.h:6769

View

std::unique_ptr<NDFrameBase> reindex_with_indexer(const numpy::NDArray<numpy::int64>& indexer) const override

std::unique_ptr<NDFrameBase>

pd_dataframe.h:27156

View

DataFrame reindex_with_spec(const std::vector<std::string>& labels, int axis, const FillSpec& spec, const std::string& method = "", std::optional<int> limit = std::nullopt, std::optional<double> tolerance = std::nullopt) const

DataFrame

pd_dataframe.h:6531

View

DataFrame rename(const std::map<std::string, std::string>& mapper = {}, const std::map<std::string, std::string>& index = {}, const std::map<std::string, std::string>& columns = {}, int axis = 0, bool copy = true, bool inplace = false, std::optional<int> level = std::nullopt, const std::string& errors = "ignore") const

DataFrame

pd_dataframe.h:5336

View

DataFrame rename(const std::map<std::string, std::string>& mapper, int axis) const

DataFrame

pd_dataframe.h:5385

View

DataFrame rename_axis(const std::string& mapper = "", const std::string& index = "", const std::string& columns = "", int axis = 0, bool copy = true, bool inplace = false) const

DataFrame

pd_dataframe.h:6604

View

DataFrame rename_axis(const std::string& mapper, int axis) const

DataFrame

pd_dataframe.h:6642

View

DataFrame rename_columns(const std::map<std::string, std::string>& mapper) const

DataFrame

pd_dataframe.h:5278

View

Result rename_result(const RenameSpec& spec, int axis, bool inplace, bool errors_raise)

Result

pd_dataframe.h:5403

View

DataFrame reorder_levels(const std::vector<int>& order, int axis = 0) const

DataFrame

pd_dataframe.h:15657

View

DataFrame reorder_levels(const std::vector<int>& order, const std::string& axis) const

DataFrame

pd_dataframe.h:15708

View

DataFrame reorder_levels(const std::vector<std::string>& order, int axis = 0) const

DataFrame

pd_dataframe.h:15731

View

DataFrame reorder_levels(std::initializer_list<const char\*> order, int axis = 0) const

DataFrame

pd_dataframe.h:15766

View

DataFrame reorder_levels_columns(const std::vector<size_t>& order) const

DataFrame

pd_dataframe.h:15986

static std::string reorder_levels_in_label(const std::string& label, const std::vector<size_t>& order)

static std::string

pd_dataframe.h:15590

DataFrame reorder_levels_index(const std::vector<size_t>& order) const

DataFrame

pd_dataframe.h:15932

DataFrame replace(double to_replace, double value, bool inplace = false, std::optional<int> limit = std::nullopt, bool regex = false, const std::string& method = "") const

DataFrame

pd_dataframe.h:9027

View

DataFrame replace(const std::string& to_replace, const std::string& value, bool inplace = false, std::optional<int> limit = std::nullopt, bool regex = false, const std::string& method = "") const

DataFrame

pd_dataframe.h:9050

View

DataFrame replace(const std::map<std::string,std::string>& mapping, bool inplace = false) const

DataFrame

pd_dataframe.h:9075

View

DataFrame replace(std::int64_t to_replace, std::int64_t value) const

DataFrame

pd_dataframe.h:9095

View

DataFrame replace(int old_val, int new_val) const

DataFrame

pd_dataframe.h:9100

View

void replace_column_str(const std::string& name, const std::vector<std::string>& data)

void

pd_dataframe.h:7092

void replace_value(double to_replace, double value) override

void

pd_dataframe.h:8423

DataFrame& reset_index(bool drop = false, std::optional<int> level = std::nullopt, bool inplace = false, std::optional<int> col_level = std::nullopt, const std::string& col_fill = "", bool allow_duplicates = false, const std::optional<std::vector<std::string>>& names = std::nullopt)

DataFrame&

pd_dataframe.h:5027

View

DataFrame set_axis(const std::vector<std::string>& labels, int axis = 0, bool copy = true) const

DataFrame

pd_dataframe.h:6559

View

void set_index(std::unique_ptr<IndexBase> new_index) override

void

pd_dataframe.h:4611

View

void set_index(const std::shared_ptr<IdxT>& new_index)

void

pd_dataframe.h:4630

View

DataFrame set_index(std::initializer_list<const char\*> keys, bool drop = true, bool append = false, bool verify_integrity = false) const

DataFrame

pd_dataframe.h:4640

View

DataFrame set_index(const std::vector<std::string>& keys, bool drop = true, bool append = false, bool verify_integrity = false) const

DataFrame

pd_dataframe.h:4652

View

DataFrame set_index(const std::string& key, bool drop = true, bool append = false, bool verify_integrity = false) const

DataFrame

pd_dataframe.h:4663

View

void set_index(const IdxT& idx)

void

pd_dataframe.h:4682

View

set_index(idx.clone())

pd_dataframe.h:4684

View

void set_index(const MultiIndex& mi)

void

pd_dataframe.h:4690

View

void set_index(MultiIndex&& mi)

void

pd_dataframe.h:4693

View

void set_index(std::initializer_list<std::type_identity_t<T>> values)

void

pd_dataframe.h:4713

View

set_index(std::make_unique<Index<T>>(std::move(vec)))

pd_dataframe.h:4715

View

void set_index(const std::vector<std::type_identity_t<T>>& values)

void

pd_dataframe.h:4720

View

set_index(std::make_unique<Index<T>>(values))

pd_dataframe.h:4721

View

void set_index(std::vector<std::type_identity_t<T>>&& values)

void

pd_dataframe.h:4726

View

set_index(std::make_unique<Index<T>>(std::move(values)))

pd_dataframe.h:4727

View

set_index(std::make_unique<Index<std::string>>(vec))

pd_dataframe.h:4740

View

set_index(std::make_unique<Index<numpy::int64>>(vec))

pd_dataframe.h:4745

View

set_index(std::make_unique<Index<numpy::int64>>(vec))

pd_dataframe.h:4752

View

set_index(std::make_unique<Index<numpy::int64>>(vec))

pd_dataframe.h:4762

View

set_index(std::make_unique<Index<std::string>>(values))

pd_dataframe.h:4766

View

set_index(std::make_unique<DatetimeIndex>(std::move(dti)))

pd_dataframe.h:4842

View

set_index(std::move(idx))

pd_dataframe.h:4890

View

DataFrame set_index_col(const std::string& col_name, bool drop = true, bool append = false, bool verify_integrity = false) const

DataFrame

pd_dataframe.h:4901

View

DataFrame set_index_col(const std::vector<std::string>& keys, bool drop = true, bool append = false, bool verify_integrity = false) const

DataFrame

pd_dataframe.h:4948

View

DataFrame set_index_col(std::initializer_list<const char\*> keys, bool drop = true, bool append = false, bool verify_integrity = false) const

DataFrame

pd_dataframe.h:5007

View

void set_index_from_column(const std::string& name, std::initializer_list<T> values)

void

pd_dataframe.h:4884

View

void set_index_from_list(std::initializer_list<const char\*> values)

void

pd_dataframe.h:4738

View

void set_index_from_list(std::initializer_list<int64_t> values)

void

pd_dataframe.h:4743

View

void set_index_from_list(std::initializer_list<int> values)

void

pd_dataframe.h:4748

View

void set_index_from_list(std::initializer_list<long> values)

void

pd_dataframe.h:4758

View

void set_index_from_list(const std::vector<std::string>& values)

void

pd_dataframe.h:4765

View

set_index_from_list(values)

pd_dataframe.h:4771

View

set_index_from_list(values)

pd_dataframe.h:4774

View

set_index_from_list(values)

pd_dataframe.h:4777

View

set_index_from_list(values)

pd_dataframe.h:4780

View

set_index_from_list(values)

pd_dataframe.h:4783

View

void set_index_from_strings(std::initializer_list<const char\*> values)

void

pd_dataframe.h:4779

View

void set_index_from_strings(const std::vector<std::string>& values)

void

pd_dataframe.h:4782

View

void set_index_name(const std::string& name)

void

pd_dataframe.h:4789

View

set_index_name(name)

pd_dataframe.h:4798

View

DataFrame swaplevel(int i = -2, int j = -1, int axis = 0) const

DataFrame

pd_dataframe.h:15815

View

DataFrame swaplevel(int i, int j, const std::string& axis) const

DataFrame

pd_dataframe.h:15863

View

DataFrame swaplevel(const std::string& i, const std::string& j, int axis = 0) const

DataFrame

pd_dataframe.h:15886

View

void update(const DataFrame& other, bool overwrite = true, const std::string& filter_func = "", const std::string& join = "left", const std::string& errors = "raise")

void

pd_dataframe.h:26357

View

Missing Data#

Signature

Return Type

Location

Example

DataFrame backfill(int axis = 0, std::optional<int> limit = std::nullopt, bool inplace = false, const std::string& downcast = "") const

DataFrame

pd_dataframe.h:13146

View

DataFrame bfill(int axis = 0, std::optional<int> limit = std::nullopt, bool inplace = false, const std::string& limit_area = "", const std::string& downcast = "") const

DataFrame

pd_dataframe.h:7738

View

DataFrame ffill(int axis = 0, std::optional<int> limit = std::nullopt, bool inplace = false, const std::string& limit_area = "", const std::string& downcast = "") const

DataFrame

pd_dataframe.h:7644

View

DataFrame fillna(double value, const std::string& method = "", int axis = 0, bool inplace = false, std::optional<int> limit = std::nullopt, const std::string& downcast = "") const

DataFrame

pd_dataframe.h:8771

View

DataFrame fillna(const std::string& value) const

DataFrame

pd_dataframe.h:8819

View

DataFrame fillna(const std::map<std::string, double>& values, const std::string& method = "", int axis = 0, bool inplace = false, std::optional<int> limit = std::nullopt, const std::string& downcast = "") const

DataFrame

pd_dataframe.h:8828

View

DataFrame fillna(const Series<numpy::float64>& values, const std::string& method = "", int axis = 0, bool inplace = false, std::optional<int> limit = std::nullopt, const std::string& downcast = "") const

DataFrame

pd_dataframe.h:8857

View

DataFrame fillna(const DataFrame& fill_df, int axis = 0, std::optional<int> limit = std::nullopt) const

DataFrame

pd_dataframe.h:8877

View

void fillna_double(double value) override

void

pd_dataframe.h:8413

DataFrame interpolate(const std::string& method = "linear", int axis = 0, std::optional<int> limit = std::nullopt, const std::string& limit_direction = "forward", bool inplace = false, const std::string& limit_area = "", const std::string& downcast = "") const

DataFrame

pd_dataframe.h:7835

View

interpolate_column(result, c, method, limit, limit_direction, limit_area)

pd_dataframe.h:7869

void interpolate_column(DataFrame& result, size_t col_idx, const std::string& method, std::optional<int> limit, const std::string& limit_direction, const std::string& limit_area = "") const

void

pd_dataframe.h:7886

interpolate_row(result, r, method, limit, limit_direction)

pd_dataframe.h:7874

void interpolate_row(DataFrame& result, size_t row_idx, const std::string& method, std::optional<int> limit, const std::string& limit_direction) const

void

pd_dataframe.h:8293

numpy::NDArray<numpy::bool_> isna() const

numpy::NDArray<numpy::bool_>

pd_dataframe.h:8508

View

DataFrame isna_frame() const

DataFrame

pd_dataframe.h:8538

View

numpy::NDArray<numpy::bool_> isnull() const

numpy::NDArray<numpy::bool_>

pd_dataframe.h:13126

View

DataFrame isnull_frame() const

DataFrame

pd_dataframe.h:8591

View

numpy::NDArray<numpy::bool_> notna() const

numpy::NDArray<numpy::bool_>

pd_dataframe.h:8523

View

DataFrame notna_frame() const

DataFrame

pd_dataframe.h:8565

View

numpy::NDArray<numpy::bool_> notnull() const

numpy::NDArray<numpy::bool_>

pd_dataframe.h:13134

View

DataFrame notnull_frame() const

DataFrame

pd_dataframe.h:8598

View

DataFrame pad(int axis = 0, std::optional<int> limit = std::nullopt, bool inplace = false, const std::string& downcast = "") const

DataFrame

pd_dataframe.h:13159

View

Statistics#

Signature

Return Type

Location

Example

size_t count() const override

size_t

pd_dataframe.h:7458

View

Series<numpy::int64> count(int axis, bool numeric_only = false) const

Series<numpy::int64>

pd_dataframe.h:7472

View

Series<numpy::int64> count_cols() const

Series<numpy::int64>

pd_dataframe.h:10719

View

DataFrame cummax(int axis = 0, bool skipna = true) const

DataFrame

pd_dataframe.h:20426

View

DataFrame cummin(int axis = 0, bool skipna = true) const

DataFrame

pd_dataframe.h:20311

View

DataFrame cumprod(int axis = 0, bool skipna = true) const

DataFrame

pd_dataframe.h:20214

View

DataFrame cumsum(int axis = 0, bool skipna = true) const

DataFrame

pd_dataframe.h:20108

View

DataFrame describe(const std::vector<double>& percentiles = {0.25, 0.5, 0.75}) const

DataFrame

pd_dataframe.h:11001

View

DataFrame describe_full(const std::vector<double>& percentiles = {0.25, 0.5, 0.75}, int include_mode = 0) const

DataFrame

pd_dataframe.h:10868

View

Series<numpy::float64> kurt(int axis = 0, bool skipna = true, bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:12411

View

Series<numpy::float64> kurt_cols(bool skipna = true) const

Series<numpy::float64>

pd_dataframe.h:11071

View

Series<numpy::float64> kurtosis(int axis = 0, bool skipna = true, bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:12465

View

Series<numpy::float64> max(int axis = 0, bool skipna = true, bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:12250

View

Series<numpy::float64> max_cols(bool skipna = true) const

Series<numpy::float64>

pd_dataframe.h:10459

View

Series<numpy::float64> mean(int axis = 0, bool skipna = true, bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:12011

View

Series<numpy::float64> mean_cols(bool skipna = true) const

Series<numpy::float64>

pd_dataframe.h:10392

View

Series<numpy::float64> median(int axis = 0, bool skipna = true, bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:11174

View

Series<numpy::float64> median_cols(bool skipna = true) const

Series<numpy::float64>

pd_dataframe.h:11126

View

Series<numpy::float64> min(int axis = 0, bool skipna = true, bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:12158

View

Series<numpy::float64> min_cols(bool skipna = true) const

Series<numpy::float64>

pd_dataframe.h:10430

View

DataFrame mode(int axis = 0, bool numeric_only = false, bool dropna = true) const

DataFrame

pd_dataframe.h:11242

View

DataFrame mode_cols(bool numeric_only = false, bool dropna = true) const

DataFrame

pd_dataframe.h:11261

DataFrame mode_rows(bool numeric_only = false, bool dropna = true) const

DataFrame

pd_dataframe.h:11406

Series<numpy::int64> nunique(int axis = 0, bool dropna = true) const

Series<numpy::int64>

pd_dataframe.h:13667

View

Series<numpy::int64> nunique_cols(bool dropna = true) const

Series<numpy::int64>

pd_dataframe.h:13622

View

Series<numpy::float64> prod(int axis = 0, bool skipna = true, bool numeric_only = false, int min_count = 0) const

Series<numpy::float64>

pd_dataframe.h:11554

View

Series<numpy::float64> prod_cols(bool skipna = true) const

Series<numpy::float64>

pd_dataframe.h:11507

View

Series<numpy::float64> product(int axis = 0, bool skipna = true, bool numeric_only = false, int min_count = 0) const

Series<numpy::float64>

pd_dataframe.h:12592

View

Series<numpy::float64> quantile(double q = 0.5, int axis = 0, bool numeric_only = false, const std::string& interpolation = "linear", const std::string& method = "") const

Series<numpy::float64>

pd_dataframe.h:12478

View

Series<numpy::float64> quantile_cols(double q, const std::string& interpolation = "linear") const

Series<numpy::float64>

pd_dataframe.h:13949

View

Series<numpy::float64> sem(int axis = 0, bool skipna = true, int ddof = 1, bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:12312

View

Series<numpy::float64> sem_cols(bool skipna = true, int ddof = 1) const

Series<numpy::float64>

pd_dataframe.h:11008

Series<numpy::float64> skew(int axis = 0, bool skipna = true, bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:12358

View

Series<numpy::float64> skew_cols(bool skipna = true) const

Series<numpy::float64>

pd_dataframe.h:11024

View

Series<numpy::float64> std(int axis = 0, bool skipna = true, int ddof = 1, bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:12065

View

pandas::Series<numpy::timedelta64> std_(td_vals)

pandas::Series<numpy::timedelta64>

pd_dataframe.h:21232

View

pandas::Series<numpy::timedelta64> std_(td_vals)

pandas::Series<numpy::timedelta64>

pd_dataframe.h:21597

View

Series<numpy::float64> std_cols(bool skipna = true, int ddof = 1) const

Series<numpy::float64>

pd_dataframe.h:10660

View

Series<numpy::float64> sum(int axis = 0, bool skipna = true, bool numeric_only = false, int min_count = 0) const

Series<numpy::float64>

pd_dataframe.h:11822

View

Series<numpy::float64> sum_cols(bool skipna = true) const

Series<numpy::float64>

pd_dataframe.h:10341

View

std::variant<Series<numpy::int64>, Series<numpy::float64>> value_counts( const std::vector<std::string>& subset = {}, bool normalize = false, bool sort = true, bool ascending = false, bool dropna = true) const

std::variant<Series<numpy::int64>, Series<numpy::float64>>

pd_dataframe.h:13754

View

Series<numpy::float64> var(int axis = 0, bool skipna = true, int ddof = 1, bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:12118

View

Series<numpy::float64> var_cols(bool skipna = true, int ddof = 1) const

Series<numpy::float64>

pd_dataframe.h:10703

Aggregation#

Signature

Return Type

Location

Example

DataFrame agg(const std::string& func, int axis = 0) const

DataFrame

pd_dataframe.h:23703

View

DataFrame agg(const std::vector<std::string>& funcs, int axis = 0) const

DataFrame

pd_dataframe.h:23742

View

DataFrame agg(const std::map<std::string, std::string>& col_func_map, int axis = 0) const

DataFrame

pd_dataframe.h:23795

View

DataFrame agg(const std::map<std::string, std::vector<std::string>>& col_funcs_map, int axis = 0) const

DataFrame

pd_dataframe.h:23820

View

Series<numpy::float64> agg_to_series(const std::map<std::string, std::string>& col_func_map, int axis = 0) const

Series<numpy::float64>

pd_dataframe.h:23874

View

Series<numpy::float64> agg_to_series(const std::string& func, int axis = 0) const

Series<numpy::float64>

pd_dataframe.h:23893

View

DataFrame aggregate(const std::string& func, int axis = 0) const

DataFrame

pd_dataframe.h:23776

View

DataFrame aggregate(const std::vector<std::string>& funcs, int axis = 0) const

DataFrame

pd_dataframe.h:23785

View

DataFrame aggregate(const std::map<std::string, std::string>& col_func_map, int axis = 0) const

DataFrame

pd_dataframe.h:23861

View

DataFrame aggregate(const std::map<std::string, std::vector<std::string>>& col_funcs_map, int axis = 0) const

DataFrame

pd_dataframe.h:23864

View

DataFrame apply(Func&& func, int axis = 0) const

DataFrame

pd_dataframe.h:20839

View

DataFrame apply(Func&& func, int axis = 0, bool raw = false, const std::string& result_type = "", const std::string& by_row = "compat", const std::string& engine = "", const std::map<std::string, std::string>& engine_kwargs = {}) const

DataFrame

pd_dataframe.h:20879

View

PANDASCORE_API Result apply( const FuncArg& func, int axis = 0, const std::function<ApplyCellResult(const ApplyRowInput&)>& row_cb = {}, const std::function<ApplyCellResult(const ApplyColInput&)>& col_cb = {} ) const

PANDASCORE_API Result

pd_dataframe.h:21738

View

static std::string apply_bare_double_result_dtype_(const std::string& rolled)

static std::string

pd_dataframe.h:20709

PANDASCORE_API ApplyResult apply_callable( int axis, const std::function<ApplyCellResult(const ApplyRowInput&)>& row_cb, const std::function<ApplyCellResult(const ApplyColInput&)>& col_cb, const ApplyCallableOptions& opts = ApplyCallableOptions{} ) const

PANDASCORE_API ApplyResult

pd_dataframe.h:21659

View

apply_category_meta(result, col, spec)

pd_dataframe.h:22263

apply_category_meta(result, col, sub)

pd_dataframe.h:22317

std::vector<std::complex<double>> apply_column_as_complex_(size_t c) const

std::vector<std::complex<double>>

pd_dataframe.h:20773

std::vector<double> apply_column_as_doubles_(size_t c) const

std::vector<double>

pd_dataframe.h:20744

DataFrame apply_downcast(const std::string& downcast) const

DataFrame

pd_dataframe.h:22510

static std::string apply_family_target_dtype_(const std::string& src_dtype)

static std::string

pd_dataframe.h:20657

apply_family_target_dtype_(columns_[c]->dtype_name()))

pd_dataframe.h:21029

std::vector<double> apply_named_transform_(const std::string& func_name, const std::vector<double>& values) const

std::vector<double>

pd_dataframe.h:23117

PANDASCORE_API DataFrame apply_resolved_typed( const std::function<ApplyCellResult(const pandas::Series<numpy::float64>&)>& cb, pandas::ApplyResultInference::ScalarKindHistogram& hist, int axis = 0 ) const

PANDASCORE_API DataFrame

pd_dataframe.h:21701

View

DataFrame apply_with_args(Func&& func, int axis, bool raw, const std::string& result_type, std::tuple<Args...> args, const std::string& by_row = "compat") const

DataFrame

pd_dataframe.h:21746

View

DataFrame applymap(Func&& func, const std::string& na_action = "") const

DataFrame

pd_dataframe.h:21859

View

DataFrameEWM ewm(std::optional<double> com = std::nullopt, std::optional<double> span = std::nullopt, std::optional<double> halflife = std::nullopt, std::optional<double> alpha = std::nullopt, int min_periods = 0, bool adjust = true, bool ignore_na = false, int axis = 0, const std::vector<double>& times = {}, const std::string& method = "single") const

DataFrameEWM

pd_dataframe.h:13081

View

DataFrameExpanding expanding(size_t min_periods = 1, int axis = 0, const std::string& method = "single") const

DataFrameExpanding

pd_dataframe.h:13055

View

DataFrameGroupBy groupby( std::initializer_list<const char\*> by_init, int axis = 0, std::optional<int> level = std::nullopt, bool as_index = true, bool sort = true, bool group_keys = true, bool observed = true, bool dropna = true) const

DataFrameGroupBy

pd_dataframe.h:26598

View

DataFrameGroupBy groupby(const std::string& by, int axis = 0, std::optional<int> level = std::nullopt, bool as_index = true, bool sort = true, bool group_keys = true, bool observed = true, bool dropna = true) const

DataFrameGroupBy

pd_dataframe.h:26608

View

DataFrameGroupBy groupby(const std::vector<std::string>& by, int axis = 0, std::optional<int> level = std::nullopt, bool as_index = true, bool sort = true, bool group_keys = true, bool observed = true, bool dropna = true) const

DataFrameGroupBy

pd_dataframe.h:26629

View

DataFrameGroupBy groupby( const Grouper& by, int axis = 0, bool as_index = true, bool sort = true, bool group_keys = true, std::optional<bool> observed = std::nullopt, bool dropna = true) const

DataFrameGroupBy

pd_dataframe.h:26652

View

DataFrameGroupBy groupby( const std::vector<Grouper>& by, int axis = 0, bool as_index = true, bool sort = true, bool group_keys = true, std::optional<bool> observed = std::nullopt, bool dropna = true) const

DataFrameGroupBy

pd_dataframe.h:26672

View

DataFrameGroupBy groupby( std::initializer_list<std::variant<Grouper, std::string>> by_init, int axis = 0, bool as_index = true, bool sort = true, bool group_keys = true, std::optional<bool> observed = std::nullopt, bool dropna = true) const

DataFrameGroupBy

pd_dataframe.h:26685

View

DataFrame map(Func&& func, const std::string& na_action = "") const

DataFrame

pd_dataframe.h:21888

View

PANDASCORE_API DataFrame map_callable( const std::function<ApplyCellResult(const MapCellInput&)>& cell_cb, const std::string& na_action = "" ) const

PANDASCORE_API DataFrame

pd_dataframe.h:21666

PANDASCORE_API DataFrame map_callable_resolved( const std::function<ApplyCellResult(const pandas::MapCellInput&)>& cb, pandas::ApplyResultInference::ScalarKindHistogram& hist ) const

PANDASCORE_API DataFrame

pd_dataframe.h:21707

View

auto pipe(Func&& func, Args&&... args) const

auto

pd_dataframe.h:23921

View

auto pipe(Func&& func, Args&&... args)

auto

pd_dataframe.h:23930

View

DataFrameResampler resample(const std::string& rule, int axis = 0, const std::string& closed = "", const std::string& label = "", const std::string& convention = "start", const std::string& kind = "", const std::string& on = "", std::optional<int> level = std::nullopt, const std::string& origin = "start_day", const std::string& offset = "", bool group_keys = false) const

DataFrameResampler

pd_dataframe.h:26725

View

DataFrameRolling rolling(size_t window, size_t min_periods = 1, bool center = false, const std::string& win_type = "", const std::string& on = "", int axis = 0, const std::string& closed = "right", std::optional<size_t> step = std::nullopt, const std::string& method = "single") const

DataFrameRolling

pd_dataframe.h:13032

View

DataFrame transform(Func&& func, int axis = 0) const

DataFrame

pd_dataframe.h:23357

View

DataFrame transform(const std::string& func_name, int axis = 0) const

DataFrame

pd_dataframe.h:23455

View

DataFrame transform(const std::vector<std::string>& func_names, int axis = 0) const

DataFrame

pd_dataframe.h:23525

View

DataFrame transform(const std::map<std::string, std::string>& col_func_map, int axis = 0) const

DataFrame

pd_dataframe.h:23565

View

DataFrame transform(const std::map<std::string, std::vector<std::string>>& col_funcs_map, int axis = 0) const

DataFrame

pd_dataframe.h:23614

View

PANDASCORE_API DataFrame transform_callable_resolved( const std::function<ApplyCellResult(const pandas::Series<numpy::float64>&)>& cb, pandas::ApplyResultInference::ScalarKindHistogram& hist, int axis = 0 ) const

PANDASCORE_API DataFrame

pd_dataframe.h:21712

View

Arithmetic#

Signature

Return Type

Location

Example

DataFrame add(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 0.0) const

DataFrame

pd_dataframe.h:19521

View

DataFrame add(double scalar) const

DataFrame

pd_dataframe.h:19529

View

DataFrame add(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19533

View

void add_column(const std::string& name, const std::vector<T>& data)

void

pd_dataframe.h:6854

View

void add_column_nullable(const std::string& name, std::initializer_list<std::conditional_t< std::is_same_v<T, std::string>, NullableString, Nullable<T>>> data)

void

pd_dataframe.h:6984

View

void add_column_with_dtype_override(const std::string& name, const std::vector<T>& data, const std::string& dtype_str)

void

pd_dataframe.h:7034

View

DataFrame add_prefix(const std::string& prefix, int axis = -1) const

DataFrame

pd_dataframe.h:6660

View

DataFrame add_suffix(const std::string& suffix, int axis = -1) const

DataFrame

pd_dataframe.h:6711

View

DataFrame div(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19623

View

DataFrame div(double scalar) const

DataFrame

pd_dataframe.h:19631

View

DataFrame div(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19635

View

DataFrame divide(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19642

View

DataFrame divide(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19648

View

DataFrame dot(const DataFrame& other) const

DataFrame

pd_dataframe.h:19264

View

Series<double> dot(const Series<T>& other) const

Series<double>

pd_dataframe.h:19323

View

DataFrame floordiv(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19678

View

DataFrame floordiv(double scalar) const

DataFrame

pd_dataframe.h:19687

View

DataFrame mod(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19698

View

DataFrame mod(double scalar) const

DataFrame

pd_dataframe.h:19707

View

DataFrame mul(const numpy::NDArray<numpy::float64>& rhs) const

DataFrame

pd_dataframe.h:3239

View

DataFrame mul(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19585

View

DataFrame mul(double scalar) const

DataFrame

pd_dataframe.h:19593

View

DataFrame mul(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19597

View

MultiIndex multi_idx(std::move(levels), std::move(codes), level_names)

MultiIndex

pd_dataframe.h:13932

const MultiIndex& multiindex() const

const MultiIndex&

pd_dataframe.h:1351

View

DataFrame multiply(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19604

View

DataFrame multiply(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19610

View

DataFrame pow(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19718

View

DataFrame pow(double exponent) const

DataFrame

pd_dataframe.h:19727

View

DataFrame radd(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 0.0) const

DataFrame

pd_dataframe.h:19747

View

DataFrame radd(double scalar, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19756

View

DataFrame radd(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19765

View

DataFrame rdiv(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19855

View

DataFrame rdiv(double scalar, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19863

View

DataFrame rdiv(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19871

View

DataFrame rfloordiv(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19907

View

DataFrame rfloordiv(double scalar, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19915

View

DataFrame rmod(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19935

View

DataFrame rmod(double scalar, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19943

View

DataFrame rmul(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19819

View

DataFrame rmul(double scalar, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19828

View

DataFrame rmul(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19836

View

DataFrame rpow(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19963

View

DataFrame rpow(double base, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19971

View

DataFrame rsub(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 0.0) const

DataFrame

pd_dataframe.h:19784

View

DataFrame rsub(double scalar, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19792

View

DataFrame rsub(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19800

View

DataFrame rtruediv(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19885

View

DataFrame rtruediv(double scalar, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:19891

View

DataFrame sub(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 0.0) const

DataFrame

pd_dataframe.h:19547

View

DataFrame sub(double scalar) const

DataFrame

pd_dataframe.h:19555

View

DataFrame sub(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19559

View

DataFrame subtract(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 0.0) const

DataFrame

pd_dataframe.h:19566

View

DataFrame subtract(const Series<numpy::float64>& other, int axis = 1, std::optional<double> fill_value = std::nullopt, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19572

View

DataFrame truediv(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt, double fill_value = 1.0) const

DataFrame

pd_dataframe.h:19661

View

DataFrame truediv(double scalar) const

DataFrame

pd_dataframe.h:19667

View

Comparison#

Signature

Return Type

Location

Example

DataFrame compare( const DataFrame& other, bool keep_shape = false, bool keep_equal = false) const

DataFrame

pd_dataframe.h:26425

View

DataFrame eq(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19373

View

DataFrame eq(const Series<numpy::float64>& other, int axis = 1) const

DataFrame

pd_dataframe.h:19446

View

DataFrame eq(double scalar) const

DataFrame

pd_dataframe.h:19466

View

bool equals(const DataFrame& other) const

bool

pd_dataframe.h:10199

View

DataFrame ge(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19438

View

DataFrame ge(const Series<numpy::float64>& other, int axis = 1) const

DataFrame

pd_dataframe.h:19461

View

DataFrame ge(double scalar) const

DataFrame

pd_dataframe.h:19486

View

std::mt19937 gen(random_state == 0 ? std::random_device{}() : random_state)

std::mt19937

pd_dataframe.h:4063

View

DataFrame gt(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19425

View

DataFrame gt(const Series<numpy::float64>& other, int axis = 1) const

DataFrame

pd_dataframe.h:19458

View

DataFrame gt(double scalar) const

DataFrame

pd_dataframe.h:19482

View

DataFrame le(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19412

View

DataFrame le(const Series<numpy::float64>& other, int axis = 1) const

DataFrame

pd_dataframe.h:19455

View

DataFrame le(double scalar) const

DataFrame

pd_dataframe.h:19478

View

std::vector<std::vector<std::string>> level_arrays(keys.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:4967

std::vector<std::vector<std::string>> level_arrays(col_names.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:13882

std::vector<std::vector<std::string>> level_arrays(index.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:16563

std::vector<std::vector<std::string>> level_arrays(n_result_levels)

std::vector<std::vector<std::string>>

pd_dataframe.h:18130

std::vector<std::vector<std::string>> level_values(nlvl)

std::vector<std::vector<std::string>>

pd_dataframe.h:15002

std::vector<std::vector<std::string>> level_values(n_levels)

std::vector<std::vector<std::string>>

pd_dataframe.h:17926

std::vector<std::vector<std::string>> levels(n_levels)

std::vector<std::vector<std::string>>

pd_dataframe.h:16881

View

std::vector<std::vector<std::string>> levels(1 + n_inner)

std::vector<std::vector<std::string>>

pd_dataframe.h:17300

View

DataFrame lt(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19399

View

DataFrame lt(const Series<numpy::float64>& other, int axis = 1) const

DataFrame

pd_dataframe.h:19452

View

DataFrame lt(double scalar) const

DataFrame

pd_dataframe.h:19474

View

DataFrame ne(const DataFrame& other, int axis = 1, std::optional<int> level = std::nullopt) const

DataFrame

pd_dataframe.h:19386

View

DataFrame ne(const Series<numpy::float64>& other, int axis = 1) const

DataFrame

pd_dataframe.h:19449

View

DataFrame ne(double scalar) const

DataFrame

pd_dataframe.h:19470

View

DatetimeArray new_arr(new_dt_values)

DatetimeArray

pd_dataframe.h:3880

std::vector<std::vector<std::string>> new_levels(columns_levels_.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:2837

std::vector<std::vector<std::string>> new_levels(levels.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:2886

std::vector<std::vector<std::string>> new_stacked_indices(stack_lvls.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:18526

Sorting#

Signature

Return Type

Location

Example

DataFrame rank(int axis = 0, const std::string& method = "average", bool ascending = true, const std::string& na_option = "keep", bool pct = false, bool numeric_only = false) const

DataFrame

pd_dataframe.h:15068

View

DataFrame sort_index(bool ascending) const

DataFrame

pd_dataframe.h:14908

View

DataFrame sort_index(int axis, bool ascending) const

DataFrame

pd_dataframe.h:14911

View

DataFrame sort_index(int axis = 0, std::optional<int> level = std::nullopt, bool ascending = true, bool inplace = false, const std::string& kind = "quicksort", const std::string& na_position = "last", bool sort_remaining = true, bool ignore_index = false, std::function<std::string(const std::string&)> key = nullptr) const

DataFrame

pd_dataframe.h:14914

View

DataFrame sort_values(const std::string& by, bool ascending, S&& na_position, int axis = 0, bool inplace = false, const std::string& kind = "quicksort", bool ignore_index = false, const std::string& key = "") const

DataFrame

pd_dataframe.h:14619

View

DataFrame sort_values(const std::vector<std::string>& by, bool ascending, S&& na_position, int axis = 0, bool inplace = false, const std::string& kind = "quicksort", bool ignore_index = false, const std::string& key = "") const

DataFrame

pd_dataframe.h:14631

View

DataFrame sort_values(std::initializer_list<const char\*> by_init, bool ascending = true) const

DataFrame

pd_dataframe.h:14642

View

DataFrame sort_values(const std::string& by, bool ascending = true, int axis = 0, bool inplace = false, const std::string& kind = "quicksort", const std::string& na_position = "last", bool ignore_index = false, const std::string& key = "") const

DataFrame

pd_dataframe.h:14648

View

DataFrame sort_values(const std::vector<std::string>& by, bool ascending, int axis = 0, bool inplace = false, const std::string& kind = "quicksort", const std::string& na_position = "last", bool ignore_index = false, const std::string& key = "") const

DataFrame

pd_dataframe.h:14661

View

DataFrame sort_values(const std::vector<std::string>& by, const std::vector<bool>& ascending, int axis = 0, bool inplace = false, const std::string& kind = "quicksort", const std::string& na_position = "last", bool ignore_index = false, const std::string& key = "") const

DataFrame

pd_dataframe.h:14683

View

DataFrame sort_values_by_transformed( const std::vector<std::vector<std::string>>& transformed_cols, const std::vector<bool>& ascending, const std::string& na_position = "last", bool ignore_index = false) const

DataFrame

pd_dataframe.h:14843

View

Reshaping#

Signature

Return Type

Location

Example

DataFrame T() const

DataFrame

pd_dataframe.h:1270

View

DataFrame explode(const std::string& column, bool ignore_index = false) const

DataFrame

pd_dataframe.h:17984

View

DataFrame melt(std::initializer_list<std::string> id_vars, std::initializer_list<std::string> value_vars, const std::string& var_name = "variable", const std::string& value_name = "value", std::optional<int> col_level = std::nullopt, bool ignore_index = true) const

DataFrame

pd_dataframe.h:17667

View

DataFrame melt(const std::vector<std::string>& id_vars, const std::string& var_name, const std::string& value_name) const

DataFrame

pd_dataframe.h:17679

View

DataFrame melt(const std::vector<std::string>& id_vars, const std::vector<std::string>& value_vars, const std::string& col_level_placeholder, const std::string& var_name, const std::string& value_name) const

DataFrame

pd_dataframe.h:17686

View

DataFrame melt(const std::vector<std::string>& id_vars, const std::vector<std::string>& value_vars = {}, const std::string& var_name = "variable", const std::string& value_name = "value", std::optional<int> col_level = std::nullopt, bool ignore_index = true) const

DataFrame

pd_dataframe.h:17695

View

DataFrame pivot(const std::string& columns, const std::string& index = "", const std::string& values = "") const

DataFrame

pd_dataframe.h:16099

View

static std::string pivot_result_dtype_for_agg(const std::string& aggfunc, const std::string& src_dt)

static std::string

pd_dataframe.h:16032

DataFrame pivot_table(const std::string& values, const std::string& index, const std::string& columns, const std::string& aggfunc, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:16285

View

DataFrame pivot_table(const std::string& values, const std::string& index, const std::string& columns, const std::string& aggfunc, bool margins, const std::string& margins_name = "All", bool dropna = true, bool observed = false, bool sort = true) const

DataFrame

pd_dataframe.h:16307

View

DataFrame pivot_table(const std::vector<std::string>& values, const std::vector<std::string>& index, const std::string& columns, const std::string& aggfunc, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

DataFrame

pd_dataframe.h:16328

View

DataFrame pivot_table(const std::vector<std::string>& index, const std::vector<std::string>& columns, const std::string& aggfunc) const

DataFrame

pd_dataframe.h:16347

View

DataFrame pivot_table(const std::vector<std::string>& values, const std::vector<std::string>& index, const std::vector<std::string>& columns, const std::string& aggfunc, double fill_value = std::numeric_limits<double>::quiet_NaN(), bool margins = false, const std::string& margins_name = "All", bool dropna = true, bool observed = false, bool sort = true, bool values_was_scalar = false) const

DataFrame

pd_dataframe.h:16447

View

DataFrame pivot_table_multi_agg( const std::vector<std::string>& values, const std::vector<std::string>& index, const std::vector<std::string>& columns, const std::vector<std::string>& aggfuncs, double fill_value = std::numeric_limits<double>::quiet_NaN(), bool margins = false, const std::string& margins_name = "All") const

DataFrame

pd_dataframe.h:17456

View

DataFrame pivot_table_with_margins( const std::string& values, const std::string& index, const std::string& columns, const std::string& aggfunc = "mean", double fill_value = std::numeric_limits<double>::quiet_NaN(), bool margins = false, const std::string& margins_name = "All" ) const

DataFrame

pd_dataframe.h:16982

View

DataFrame pivot_table_with_margins( const std::vector<std::string>& values, const std::vector<std::string>& index, const std::vector<std::string>& columns, const std::string& aggfunc = "mean", double fill_value = std::numeric_limits<double>::quiet_NaN(), bool margins = false, const std::string& margins_name = "All" ) const

DataFrame

pd_dataframe.h:17017

View

DataFrame squeeze() const

DataFrame

pd_dataframe.h:18091

View

DataFrame stack(std::optional<int> level = std::nullopt, bool dropna = true, bool sort = true, bool future_stack = false) const

DataFrame

pd_dataframe.h:18216

View

DataFrame stack_levels(const std::vector<int>& levels, bool dropna = true) const

DataFrame

pd_dataframe.h:18469

View

Series<numpy::float64> stack_to_series(std::optional<int> level = std::nullopt, bool dropna = true, bool sort = true, bool future_stack = false) const

Series<numpy::float64>

pd_dataframe.h:18115

DataFrame swapaxes(int axis1 = 0, int axis2 = 1, bool copy = true) const

DataFrame

pd_dataframe.h:13340

View

DataFrame transpose() const

DataFrame

pd_dataframe.h:15126

View

DataFrame unstack(int level = -1, std::optional<double> fill_value = std::nullopt, bool sort = true) const

DataFrame

pd_dataframe.h:18759

View

Combining#

Signature

Return Type

Location

Example

std::pair<DataFrame, DataFrame> align( const DataFrame& other, const std::string& join = "outer", int axis = -1, std::optional<int> level = std::nullopt, bool copy = true, double fill_value = std::numeric_limits<double>::quiet_NaN(), const std::string& method = "", std::optional<int> limit = std::nullopt, std::optional<int> fill_axis = std::nullopt, std::optional<int> broadcast_axis = std::nullopt) const

std::pair<DataFrame, DataFrame>

pd_dataframe.h:25826

View

std::pair<DataFrame, Series<T>> align_series( const Series<T>& other, const std::string& join = "outer", int axis = -1, double fill_value = std::numeric_limits<double>::quiet_NaN()) const

std::pair<DataFrame, Series<T>>

pd_dataframe.h:25916

View

DataFrame combine(const DataFrame& other, Func func, std::optional<double> fill_value = std::nullopt, bool overwrite = true) const

DataFrame

pd_dataframe.h:26038

View

DataFrame combine_first(const DataFrame& other) const

DataFrame

pd_dataframe.h:26266

View

static std::string combine_promote_dtype_(const std::string& a, const std::string& b)

static std::string

pd_dataframe.h:26032

static DataFrame concat( const std::vector<DataFrame>& objs, int axis = 0, const std::string& join = "outer", bool ignore_index = false, const std::vector<std::string>& keys = {}, [[maybe_unused]] const std::vector<std::vector<std::string>>& levels = {}, [[maybe_unused]] const std::vector<std::string>& names = {}, bool verify_integrity = false, bool sort = false, [[maybe_unused]] bool copy = true)

static DataFrame

pd_dataframe.h:24240

View

static DataFrame concat(const std::vector<DataFrame>& frames, bool ignore_index)

static DataFrame

pd_dataframe.h:24720

View

std::unique_ptr<NDFrameBase> concat_with(const NDFrameBase& other) const override

std::unique_ptr<NDFrameBase>

pd_dataframe.h:27144

DataFrame join( const DataFrame& other, const std::string& how = "left", const std::string& lsuffix = "", const std::string& rsuffix = "_r", bool sort = false, const std::string& on = "", const std::string& validate = "") const

DataFrame

pd_dataframe.h:25540

View

DataFrame merge( const DataFrame& right, const std::string& how = "inner", const std::vector<std::string>& on = {}, const std::vector<std::string>& left_on = {}, const std::vector<std::string>& right_on = {}, const std::pair<std::string, std::string>& suffixes = {"_x", "_y"}, bool left_index = false, bool right_index = false, bool sort = false, bool copy = true, const std::string& indicator = "", const std::string& validate = "", bool preserve_left_index = false) const

DataFrame

pd_dataframe.h:24739

View

DataFrame merge( const DataFrame& right, const std::string& how, const std::string& on, const std::pair<std::string, std::string>& suffixes = {"_x", "_y"}) const

DataFrame

pd_dataframe.h:25504

View

DataFrame merge( const DataFrame& right, const std::string& how, const std::vector<std::string>& on, const std::vector<std::string>& left_on, const std::vector<std::string>& right_on, const std::pair<std::string, std::string>& suffixes, bool left_index, bool right_index, bool sort, const std::string& indicator, const std::string& validate) const

DataFrame

pd_dataframe.h:25515

View

Time Series#

Signature

Return Type

Location

Example

DataFrame asfreq(const std::string& freq, const std::string& method = "", const std::string& how = "", bool normalize = false, std::optional<double> fill_value = std::nullopt) const

DataFrame

pd_dataframe.h:26750

View

Series<numpy::float64> asof(const std::string& where, const std::vector<std::string>& subset = {}) const

Series<numpy::float64>

pd_dataframe.h:27059

View

DataFrame asof(const std::vector<std::string>& where, const std::vector<std::string>& subset = {}) const

DataFrame

pd_dataframe.h:27065

View

DataFrame between_time(const std::string& start_time, const std::string& end_time, bool include_start = true, bool include_end = true, int axis = 0) const

DataFrame

pd_dataframe.h:26982

View

DataFrame between_time(const std::string& start_time, const std::string& end_time, const std::string& inclusive, int axis = 0) const

DataFrame

pd_dataframe.h:26999

View

DataFrame diff(int periods = 1, int axis = 0) const

DataFrame

pd_dataframe.h:20536

View

DataFrame pct_change(int periods = 1, const std::string& fill_method = "", std::optional<int> limit = std::nullopt, const std::string& freq = "") const

DataFrame

pd_dataframe.h:14556

View

DataFrame shift(int periods = 1, int axis = 0, std::optional<double> fill_value = std::nullopt, const std::string& freq = "", const std::string& suffix = "") const

DataFrame

pd_dataframe.h:20584

View

DataFrame to_period(const std::string& freq = "", int axis = 0, bool copy = true) const

DataFrame

pd_dataframe.h:26800

View

DataFrame to_timestamp(const std::string& freq = "", const std::string& how = "start", int axis = 0, bool copy = true) const

DataFrame

pd_dataframe.h:26841

View

DataFrame tz_convert(const std::string& tz, int axis = 0, int level = -1, bool copy = true) const

DataFrame

pd_dataframe.h:26893

View

DataFrame tz_localize(const std::string& tz, int axis = 0, int level = -1, bool copy = true, const std::string& ambiguous = "raise", const std::string& nonexistent = "raise") const

DataFrame

pd_dataframe.h:26951

View

I/O#

Signature

Return Type

Location

Example

std::string to_csv( bool include_index = true, char sep = ',', [[maybe_unused]] std::optional<size_t> chunksize = std::nullopt, const std::vector<std::string>& columns = {}, [[maybe_unused]] const std::string& compression = "infer", [[maybe_unused]] const std::string& date_format = "", char decimal = '.', [[maybe_unused]] const std::string& encoding = "utf-8", const std::string& lineterminator = "\\n", const std::string& na_rep = "", // Additional pandas-compatible parameters [[maybe_unused]] const std::string& path_or_buf = "", // Reserved: file path (C++ returns string) [[maybe_unused]] const std::string& mode = "w", // Reserved: file mode bool header = true, // Write column names const std::string& index_label = "", // Column label for index column(s) [[maybe_unused]] const std::string& errors = "strict", // Reserved: encoding error handling [[maybe_unused]] const std::string& storage_options = "", // Reserved: cloud storage options [[maybe_unused]] const std::string& float_format = "", // Printf-style format string for floats char quotechar = '"', // Character used to quote fields [[maybe_unused]] int quoting = 0, // Reserved: csv.QUOTE_\* constant (0=MINIMAL) bool doublequote = true, // Control quoting of quotechar inside field [[maybe_unused]] const std::string& escapechar = "") const

std::string

pd_dataframe.h:27185

View

void to_gbq( const std::string& destination_table, const std::string& project_id = "", const std::string& if_exists = "fail", bool progress_bar = true, [[maybe_unused]] bool auth_local_webserver = true, [[maybe_unused]] std::optional<size_t> chunksize = std::nullopt, [[maybe_unused]] const std::string& credentials = "", [[maybe_unused]] const std::string& location = "", [[maybe_unused]] bool reauth = false, [[maybe_unused]] const std::string& table_schema = "") const

void

pd_dataframe.h:13478

View

std::string to_json( const std::string& orient = "columns", [[maybe_unused]] const std::string& compression = "infer", [[maybe_unused]] const std::string& date_format = "epoch", [[maybe_unused]] const std::string& date_unit = "ms", [[maybe_unused]] const std::string& default_handler = "", [[maybe_unused]] int double_precision = 10, [[maybe_unused]] bool force_ascii = true, [[maybe_unused]] int indent = 0, [[maybe_unused]] const std::string& storage_options = "", // Additional pandas-compatible parameters [[maybe_unused]] const std::string& path_or_buf = "", // Reserved: file path (C++ returns string) bool index = true, // Include index in JSON output [[maybe_unused]] bool lines = false, // Reserved: write records per line (JSON Lines format) [[maybe_unused]] const std::string& mode = "w") const

std::string

pd_dataframe.h:27462

View

std::string to_xml( const std::string& path_or_buffer = "", // Renamed from 'path' for pandas API compatibility bool index = true, const std::string& root_name = "data", const std::string& row_name = "row", bool pretty_print = true, [[maybe_unused]] const std::vector<std::string>& attr_cols = {}, [[maybe_unused]] const std::string& compression = "infer", [[maybe_unused]] const std::vector<std::string>& elem_cols = {}, const std::string& encoding = "utf-8", const std::string& na_rep = "", [[maybe_unused]] const std::map<std::string, std::string>& namespaces = {}, [[maybe_unused]] const std::string& parser = "lxml", [[maybe_unused]] const std::string& prefix = "", [[maybe_unused]] const std::string& storage_options = "", [[maybe_unused]] const std::string& stylesheet = "", bool xml_declaration = true) const

std::string

pd_dataframe.h:13519

View

Conversion#

Signature

Return Type

Location

Example

DataFrame astype(const std::string& dtype, bool copy = true, const std::string& errors = "raise") const

DataFrame

pd_dataframe.h:21972

View

DataFrame astype(const std::map<std::string, std::string>& dtype_map, bool copy = true, const std::string& errors = "raise") const

DataFrame

pd_dataframe.h:21998

View

astype(std::map<std::string, std::string>{{col_name, inner_norm}}, true, errors)

pd_dataframe.h:22160

View

DataFrame astype(const DtypeSpec& spec, bool copy = true, const std::string& errors = "raise") const

DataFrame

pd_dataframe.h:22219

View

bool bool_() const

bool

pd_dataframe.h:13217

View

DataFrame convert_dtypes( bool infer_objects = true, bool convert_string = true, bool convert_integer = true, bool convert_boolean = true, bool convert_floating = true, const std::string& dtype_backend = "numpy_nullable") const

DataFrame

pd_dataframe.h:22796

View

DataFrame copy([[maybe_unused]] bool deep = true) const

DataFrame

pd_dataframe.h:27121

View

void copy_value_from(size_t src_idx, size_t dst_idx) override

void

pd_dataframe.h:8484

DataFrame infer_objects(bool copy = true) const

DataFrame

pd_dataframe.h:22948

View

Iteration#

Signature

Return Type

Location

Example

iterator begin()

iterator

pd_dataframe.h:1984

View

const_iterator begin() const

const_iterator

pd_dataframe.h:1988

View

iterator end()

iterator

pd_dataframe.h:2000

View

const_iterator end() const

const_iterator

pd_dataframe.h:2004

View

ItemsRange items() const

ItemsRange

pd_dataframe.h:2141

View

RowsRange iterrows() const

RowsRange

pd_dataframe.h:2282

View

TuplesRange itertuples(bool index = true, const std::string& name = "Pandas") const

TuplesRange

pd_dataframe.h:2507

View

const Index<std::string>& keys() const

const Index<std::string>&

pd_dataframe.h:2029

View

Set Operations#

Signature

Return Type

Location

Example

Series<numpy::bool_> duplicated(const std::vector<std::string>& subset = {}, const std::string& keep = "first") const

Series<numpy::bool_>

pd_dataframe.h:10044

View

DataFrame isin(const std::vector<T>& values) const

DataFrame

pd_dataframe.h:8623

View

DataFrame isin(const std::map<std::string, std::vector<T>>& values) const

DataFrame

pd_dataframe.h:8693

View

Type Checking#

Signature

Return Type

Location

Example

static bool is_bool_string(const std::string& value)

static bool

pd_dataframe.h:22597

static bool is_float_actually_integer(double value)

static bool

pd_dataframe.h:22643

static bool is_float_string(const std::string& value)

static bool

pd_dataframe.h:22623

bool is_homogeneous_type() const

bool

pd_dataframe.h:1872

static bool is_integer_string(const std::string& value)

static bool

pd_dataframe.h:22605

bool is_na_at(size_t idx) const override

bool

pd_dataframe.h:8400

View

is_nullable_ext_name(source_common_override)

pd_dataframe.h:15185

static bool is_numeric_dtype(const std::string& dtype)

static bool

pd_dataframe.h:10774

View

static bool is_numeric_dtype_name(const std::string& dt)

static bool

pd_dataframe.h:11683

Other Methods#

Signature

Return Type

Location

Example

std::vector<std::vector<double>> B_mat(static_cast<int>(nvalid), std::vector<double>(n_b, 0.0))

std::vector<std::vector<double>>

pd_dataframe.h:8116

std::vector<std::vector<double>> B_mat2(static_cast<int>(nvalid), std::vector<double>(n_b2, 0.0))

std::vector<std::vector<double>>

pd_dataframe.h:8155

std::vector<std::vector<double>> B_mat2(static_cast<int>(nvalid), std::vector<double>(n_b2, 0.0))

std::vector<std::vector<double>>

pd_dataframe.h:8171

PD_TN_TRY(numpy::float64)

pd_dataframe.h:13430

PD_TN_TRY(numpy::float32)

pd_dataframe.h:13431

PD_TN_TRY(numpy::int64)

pd_dataframe.h:13432

PD_TN_TRY(numpy::int32)

pd_dataframe.h:13433

PD_TN_TRY(numpy::int16)

pd_dataframe.h:13434

PD_TN_TRY(numpy::int8)

pd_dataframe.h:13435

PD_TN_TRY(numpy::uint64)

pd_dataframe.h:13436

PD_TN_TRY(numpy::uint32)

pd_dataframe.h:13437

PD_TN_TRY(numpy::uint16)

pd_dataframe.h:13438

PD_TN_TRY(numpy::uint8)

pd_dataframe.h:13439

PD_TN_TRY(numpy::bool_)

pd_dataframe.h:13440

std::vector<std::vector<double>> VtV(pn, std::vector<double>(pn, 0.0))

std::vector<std::vector<double>>

pd_dataframe.h:8133

DataFrame abs() const

DataFrame

pd_dataframe.h:20051

View

bool all(int axis = 0, bool bool_only = false, bool skipna = true) const override

bool

pd_dataframe.h:23947

View

Series<numpy::bool_> all_axis(int axis = 0, bool skipna = true) const

Series<numpy::bool_>

pd_dataframe.h:23984

View

bool all_columns_timedelta() const

bool

pd_dataframe.h:727

View

bool any(int axis = 0, bool bool_only = false, bool skipna = true) const override

bool

pd_dataframe.h:23964

View

Series<numpy::bool_> any_axis(int axis = 0, bool skipna = true) const

Series<numpy::bool_>

pd_dataframe.h:24114

View

DatetimeArray arr(dt_vals)

DatetimeArray

pd_dataframe.h:5517

View

static DataFrame assemble_multi_series_dataframe( std::vector<std::unique_ptr<NDFrameBase>> cols, const std::vector<std::string>& names, std::unique_ptr<IndexBase> index_clone)

static DataFrame

pd_dataframe.h:2709

View

Series<numpy::float64> assemble_scalar_series( const std::vector<numpy::float64>& values, const std::vector<std::string>& col_names) const

Series<numpy::float64>

pd_dataframe.h:2668

View

Series<std::string> assemble_scalar_string_series( const std::vector<std::string>& values, const std::vector<std::string>& col_names) const

Series<std::string>

pd_dataframe.h:2691

View

std::vector<const IndexBase\*> axes() const override

std::vector<const IndexBase*>

pd_dataframe.h:1561

View

DataFrame broadcast_series_to_bool_dataframe(const Series<numpy::float64>& condition, int axis) const

DataFrame

pd_dataframe.h:8908

View

Axis1ColumnPlan build_axis1_plan(bool numeric_only) const

Axis1ColumnPlan

pd_dataframe.h:11695

else build_dt64()

else

pd_dataframe.h:6353

build_float(float{})

pd_dataframe.h:6388

build_float(double{})

pd_dataframe.h:6392

MultiIndexColumnLabel build_multiindex_column_label(size_t col_idx) const

MultiIndexColumnLabel

pd_dataframe.h:3064

build_native_int(int64_t{})

pd_dataframe.h:6372

build_native_int(int32_t{})

pd_dataframe.h:6374

build_native_int(int16_t{})

pd_dataframe.h:6376

build_native_int(int8_t{})

pd_dataframe.h:6378

build_native_int(uint64_t{})

pd_dataframe.h:6380

build_native_int(uint32_t{})

pd_dataframe.h:6382

build_native_int(uint16_t{})

pd_dataframe.h:6384

build_native_int(uint8_t{})

pd_dataframe.h:6386

else build_td64()

else

pd_dataframe.h:6357

build_with_dtype(bool{}, "bool")

pd_dataframe.h:21175

build_with_dtype(int64_t{}, "int64")

pd_dataframe.h:21177

build_with_dtype(uint64_t{}, "uint64")

pd_dataframe.h:21179

build_with_dtype(float{}, "float32")

pd_dataframe.h:21181

build_with_dtype(double{}, "float64")

pd_dataframe.h:21246

build_with_dtype(double{}, "float64")

pd_dataframe.h:21249

build_with_dtype(bool{}, "bool")

pd_dataframe.h:21544

build_with_dtype(int64_t{}, "int64")

pd_dataframe.h:21546

build_with_dtype(uint64_t{}, "uint64")

pd_dataframe.h:21548

build_with_dtype(float{}, "float32")

pd_dataframe.h:21550

build_with_dtype(double{}, "float64")

pd_dataframe.h:21611

build_with_dtype(double{}, "float64")

pd_dataframe.h:21613

std::vector<std::map<std::string, int>> cat_code_maps(num_sort_cols)

std::vector<std::map<std::string, int>>

pd_dataframe.h:14702

const_iterator cbegin() const

const_iterator

pd_dataframe.h:1992

View

DataFrame ceil() const

DataFrame

pd_dataframe.h:20044

View

const_iterator cend() const

const_iterator

pd_dataframe.h:2008

View

ColumnAccessType classify_column_access(const std::string& key) const

ColumnAccessType

pd_dataframe.h:3311

View

std::vector<AggColumnInfo> classify_column_dtypes(bool skipna = true, bool numeric_only = false) const

std::vector<AggColumnInfo>

pd_dataframe.h:11715

View

void clear_columns_levels()

void

pd_dataframe.h:1498

void clear_multiindex()

void

pd_dataframe.h:1361

void clear_reduced_series_repr()

void

pd_dataframe.h:1412

DataFrame clip(double lower, double upper, std::optional<int> axis = std::nullopt, bool inplace = false) const

DataFrame

pd_dataframe.h:20063

View

std::unique_ptr<NDFrameBase> clone() const override

std::unique_ptr<NDFrameBase>

pd_dataframe.h:27129

View

Series<T>& col(const std::string& name)

Series<T>&

pd_dataframe.h:3247

View

const Series<T>& col(const std::string& name) const

const Series<T>&

pd_dataframe.h:3261

View

std::vector<std::vector<numpy::float64>> col_data(ncols())

std::vector<std::vector<numpy::float64>>

pd_dataframe.h:20170

View

std::vector<std::vector<numpy::float64>> col_data(ncols())

std::vector<std::vector<numpy::float64>>

pd_dataframe.h:20271

View

std::vector<std::vector<numpy::float64>> col_data(ncols())

std::vector<std::vector<numpy::float64>>

pd_dataframe.h:20384

View

std::vector<std::vector<numpy::float64>> col_data(ncols())

std::vector<std::vector<numpy::float64>>

pd_dataframe.h:20497

View

const Series<T>& column(const std::string& name) const { return col<T>(name)

const Series<T>&

pd_dataframe.h:3275

View

Series<T>& column(const std::string& name) { return col<T>(name)

Series<T>&

pd_dataframe.h:3277

View

const NDFrameBase& column_at(size_t idx) const

const NDFrameBase&

pd_dataframe.h:2518

View

std::string column_dtype_override(const std::string& col_name) const

std::string

pd_dataframe.h:1593

View

Series<numpy::float64> column_to_series_f64(size_t col_idx) const

Series<numpy::float64>

pd_dataframe.h:2527

View

Series<std::string> column_to_series_str(size_t col_idx) const

Series<std::string>

pd_dataframe.h:2547

View

std::unique_ptr<NDFrameBase> column_to_typed_series(size_t c) const

std::unique_ptr<NDFrameBase>

pd_dataframe.h:2641

View

static bool column_values_are_integers(const NDFrameBase\* col)

static bool

pd_dataframe.h:22454

const Index<std::string>& columns() const

const Index<std::string>&

pd_dataframe.h:1277

View

const std::string& columns_index_dtype() const

const std::string&

pd_dataframe.h:1397

View

const std::vector<std::string>& columns_level_names() const

const std::vector<std::string>&

pd_dataframe.h:1530

View

const std::vector<std::vector<std::string>>& columns_levels() const

const std::vector<std::vector<std::string>>&

pd_dataframe.h:1516

View

void columns_name(const std::string& name) { set_columns_name(name)

void

pd_dataframe.h:1385

View

const std::vector<std::vector<std::string>>& columns_original_levels() const

const std::vector<std::vector<std::string>>&

pd_dataframe.h:1523

std::string compute_axis1_source_common_dtype_() const

std::string

pd_dataframe.h:20721

std::string compute_axis1_source_common_dtype_filtered_( const std::vector<size_t>& keep_indices) const

std::string

pd_dataframe.h:12222

static double compute_named_stat(const std::vector<double>& vals, const std::string& func_name)

static double

pd_dataframe.h:23657

StringColumnStats compute_string_column_stats(size_t col_idx) const

StringColumnStats

pd_dataframe.h:10826

View

static numpy::bool_ convert_to_bool(const std::string& value, const std::string& source_dtype, const std::string& col_name, size_t row_idx)

static numpy::bool_

pd_dataframe.h:22397

static numpy::float64 convert_to_float64(const std::string& value, const std::string& source_dtype, const std::string& col_name, size_t row_idx)

static numpy::float64

pd_dataframe.h:22363

static numpy::int64 convert_to_int64(const std::string& value, const std::string& source_dtype, const std::string& col_name, size_t row_idx)

static numpy::int64

pd_dataframe.h:22335

DataFrame corr(const std::string& method = "pearson", std::optional<int> min_periods = std::nullopt, bool numeric_only = false) const

DataFrame

pd_dataframe.h:13990

View

Series<numpy::float64> corrwith(const DataFrame& other, int axis = 0, bool drop = false, const std::string& method = "pearson", bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:14102

View

Series<numpy::float64> corrwith(const Series<T>& other, int axis = 0, bool drop = false, const std::string& method = "pearson", bool numeric_only = false) const

Series<numpy::float64>

pd_dataframe.h:14398

View

DataFrame cov(std::optional<int> min_periods = std::nullopt, int ddof = 1, bool numeric_only = false) const

DataFrame

pd_dataframe.h:14044

View

static std::string determine_int_dtype(const std::vector<AggColumnInfo>& infos)

static std::string

pd_dataframe.h:11797

numpy::datetime64 dt(l)

numpy::datetime64

pd_dataframe.h:5604

View

std::string dtype_name() const override

std::string

pd_dataframe.h:1234

View

std::string dtype_string(const std::string& col_name) const

std::string

pd_dataframe.h:1606

View

Series<std::string> dtypes() const

Series<std::string>

pd_dataframe.h:1568

View

emit_index_key(oss, r)

pd_dataframe.h:27621

emit_index_key(oss, r)

pd_dataframe.h:27647

emit_value(oss, c, r)

pd_dataframe.h:27623

emit_value(oss, c, r)

pd_dataframe.h:27637

emit_value(oss, c, r)

pd_dataframe.h:27652

emit_value(oss, c, r)

pd_dataframe.h:27665

emit_value(oss, c, r)

pd_dataframe.h:27688

emit_value(oss, c, r)

pd_dataframe.h:27782

bool empty() const override

bool

pd_dataframe.h:1196

View

std::vector<std::vector<std::string>> empty_levels(columns_levels_.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:2979

static std::string escape_xml(const std::string& str)

static std::string

pd_dataframe.h:13598

std::variant<Series<numpy::float64>, DataFrame, std::monostate> eval( const std::string& expr, bool inplace = false, std::optional<std::map<std::string, double>> local_dict = std::nullopt, std::optional<std::map<std::string, double>> global_dict = std::nullopt, std::optional<std::vector<std::string>> resolvers = std::nullopt, int level = 0, std::optional<std::string> target = std::nullopt, const std::string& parser = "pandas", const std::string& engine = "")

std::variant<Series<numpy::float64>, DataFrame, std::monostate>

pd_dataframe.h:9786

View

std::vector<double> eval_expression(const std::string& expr) const

std::vector<double>

pd_dataframe.h:9907

double eval_postfix(const std::vector<eval_internal::EvalToken>& postfix, size_t row) const

double

pd_dataframe.h:9929

bool evaluate_column_comparison(const query::ColumnComparisonNode\* cc, size_t row) const

bool

pd_dataframe.h:9653

bool evaluate_comparison(const query::ComparisonNode\* comp, size_t row) const

bool

pd_dataframe.h:9623

bool evaluate_membership(const query::MembershipNode\* mem, size_t row) const

bool

pd_dataframe.h:9677

bool evaluate_query_node(const query::ASTNode\* node, size_t row) const

bool

pd_dataframe.h:9576

evaluate_query_node(and_node->right.get(), row)

pd_dataframe.h:9602

evaluate_query_node(or_node->right.get(), row)

pd_dataframe.h:9607

Series<numpy::float64> extract_column_as_numeric_series(const std::string& key) const

Series<numpy::float64>

pd_dataframe.h:3514

View

std::unique_ptr<NDFrameBase> extract_column_as_series_base( const std::string& key) const

std::unique_ptr<NDFrameBase>

pd_dataframe.h:3556

RowData extract_row_typed(size_t r) const

RowData

pd_dataframe.h:2605

View

std::ofstream file(path_or_buffer)

std::ofstream

pd_dataframe.h:13582

View

DataFrame filter(const std::vector<std::string>& items = {}, const std::string& like = "", const std::string& regex = "", int axis = 1) const

DataFrame

pd_dataframe.h:13241

View

DataFrame filter_items(const std::vector<std::string>& items) const

DataFrame

pd_dataframe.h:4225

DataFrame filter_like(const std::string& like) const

DataFrame

pd_dataframe.h:4232

int find_column_by_datetime(int64_t ns_val) const

int

pd_dataframe.h:1421

static std::string find_smallest_int_dtype(double min_val, double max_val, bool prefer_signed)

static std::string

pd_dataframe.h:22433

const Flags& flags() const override

const Flags&

pd_dataframe.h:1846

View

Flags& flags_mutable()

Flags&

pd_dataframe.h:1853

DataFrame floor() const

DataFrame

pd_dataframe.h:20037

View

static std::vector<std::string> format_percentile_labels(const std::vector<double>& pcts)

static std::vector<std::string>

pd_dataframe.h:10849

View

const std::string& frame_dtype_override() const

const std::string&

pd_dataframe.h:21721

View

bool has_column(const std::string& name) const

bool

pd_dataframe.h:3293

View

bool has_columns_level_categories() const

bool

pd_dataframe.h:1551

bool has_columns_levels() const

bool

pd_dataframe.h:1509

View

bool has_multiindex() const

bool

pd_dataframe.h:1344

View

bool hasnans() const override

bool

pd_dataframe.h:7445

View

const IndexBase& index() const override

const IndexBase&

pd_dataframe.h:1284

View

IndexBase& index_mutable()

IndexBase&

pd_dataframe.h:1291

void index_name(const std::string& name)

void

pd_dataframe.h:4797

View

std::optional<std::string> index_name() const

std::optional<std::string>

pd_dataframe.h:4800

View

static std::string infer_best_dtype( const std::vector<std::string>& str_values, const std::string& source_dtype, bool convert_string, bool convert_integer, bool convert_boolean, bool convert_floating)

static std::string

pd_dataframe.h:22658

std::vector<std::vector<std::string>> inner(levels.begin() + 1, levels.end())

std::vector<std::vector<std::string>>

pd_dataframe.h:16419

std::vector<std::vector<std::string>> inner_orig(inner.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:16421

void isetitem(size_t loc, const std::vector<T>& value)

void

pd_dataframe.h:13320

View

std::istringstream iss(eval_expr)

std::istringstream

pd_dataframe.h:9819

View

std::istringstream iss(label)

std::istringstream

pd_dataframe.h:15557

View

std::istringstream iss(label)

std::istringstream

pd_dataframe.h:15592

View

std::istringstream iss(key)

std::istringstream

pd_dataframe.h:16565

View

std::istringstream iss(col_composite)

std::istringstream

pd_dataframe.h:16905

View

std::istringstream iss(idx_val)

std::istringstream

pd_dataframe.h:16936

View

std::istringstream iss(cn)

std::istringstream

pd_dataframe.h:17309

View

std::vector<std::vector<std::string>> kept_stacked(stack_lvls.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:18572

MultiIndexMatchResult match_multiindex_columns(const std::vector<std::string>& keys) const

MultiIndexMatchResult

pd_dataframe.h:2937

View

std::vector<std::vector<std::string>> mi_arrays(index.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:16931

std::vector<std::vector<std::string>> mi_arrays(nlevels)

std::vector<std::vector<std::string>>

pd_dataframe.h:17222

size_t nbytes() const override

size_t

pd_dataframe.h:1222

View

size_t ncols() const

size_t

pd_dataframe.h:1263

View

size_t ndim() const override

size_t

pd_dataframe.h:1215

View

static std::vector<double> normalize_percentiles(const std::vector<double>& percentiles)

static std::vector<double>

pd_dataframe.h:10737

size_t nrows() const

size_t

pd_dataframe.h:1256

View

numeric_aware_sort(unique_index_vals)

pd_dataframe.h:16637

numeric_aware_sort(unique_col_vals)

pd_dataframe.h:16638

query::QueryExpression parsed(expr)

query::QueryExpression

pd_dataframe.h:9489

propagate_index_to_series(s)

pd_dataframe.h:3418

propagate_index_to_series(s)

pd_dataframe.h:3437

propagate_index_to_series(s)

pd_dataframe.h:3485

propagate_index_to_series(s)

pd_dataframe.h:3506

propagate_index_to_series(s)

pd_dataframe.h:3522

void propagate_index_to_series(Series<T>& s) const

void

pd_dataframe.h:3608

propagate_index_to_series(result)

pd_dataframe.h:3635

oss << quote_field(columns_level_names_[lvl])

oss <<

pd_dataframe.h:27293

oss << quote_field(columns_levels_[lvl][col_indices[i]])

oss <<

pd_dataframe.h:27299

oss << quote_field(mi_names[idx_lvl].value())

oss <<

pd_dataframe.h:27320

oss << quote_field(resolve_idx_label())

oss <<

pd_dataframe.h:27323

oss << quote_field(mi_names[idx_lvl].value())

oss <<

pd_dataframe.h:27338

oss << quote_field(resolve_idx_label()) << sep

oss <<

pd_dataframe.h:27345

oss << quote_field(columns_index_.get_value_str(col_indices[i]))

oss <<

pd_dataframe.h:27350

oss << quote_field(tup[lvl]) << sep

oss <<

pd_dataframe.h:27363

oss << quote_field(index_->get_value_str(r)) << sep

oss <<

pd_dataframe.h:27366

oss << quote_field(na_rep)

oss <<

pd_dataframe.h:27384

oss << quote_field(apply_decimal(val))

oss <<

pd_dataframe.h:27386

oss << quote_field(apply_decimal(std::string(buf)))

oss <<

pd_dataframe.h:27398

oss << quote_field(apply_decimal(val_str))

oss <<

pd_dataframe.h:27431

oss << quote_field(na_rep)

oss <<

pd_dataframe.h:27437

oss << quote_field(apply_decimal(val))

oss <<

pd_dataframe.h:27439

std::regex re(regex)

std::regex

pd_dataframe.h:13253

std::regex re(regex)

std::regex

pd_dataframe.h:13280

int resolve_column_level(int level_int) const

int

pd_dataframe.h:18436

View

int resolve_column_level(const std::string& level_name) const

int

pd_dataframe.h:18444

View

size_t resolve_multiindex_exact_column(const std::vector<std::string>& key) const

size_t

pd_dataframe.h:3038

MultiIndexSortResolution resolve_sort_columns_multiindex( const std::vector<std::string>& by_cols) const

MultiIndexSortResolution

pd_dataframe.h:14799

View

DataFrame round(int decimals = 0) const

DataFrame

pd_dataframe.h:19987

View

std::vector<numpy::object_> row_to_objects(size_t row) const

std::vector<numpy::object_>

pd_dataframe.h:3841

std::vector<double> row_values_f64(size_t row_idx) const

std::vector<double>

pd_dataframe.h:2564

View

pandas::Series<T> s(casted)

pandas::Series<T>

pd_dataframe.h:21157

View

pandas::Series<T> s(casted)

pandas::Series<T>

pd_dataframe.h:21526

View

std::string s(buf)

std::string

pd_dataframe.h:27537

View

std::string s(buf)

std::string

pd_dataframe.h:27566

View

pandas::Series<std::complex<double>> s_cx(scalar_vals_cx)

pandas::Series<std::complex<double>>

pd_dataframe.h:20938

pandas::Series<std::complex<double>> s_cx(scalar_vals_cx)

pandas::Series<std::complex<double>>

pd_dataframe.h:21068

pandas::Series<numpy::datetime64> sdt(dt_vals)

pandas::Series<numpy::datetime64>

pd_dataframe.h:21202

pandas::Series<numpy::datetime64> sdt(dt_vals)

pandas::Series<numpy::datetime64>

pd_dataframe.h:21568

DataFrame select_columns_by_names(const std::vector<std::string>& names, bool multiindex_first_level_match = true) const

DataFrame

pd_dataframe.h:2970

DataFrame select_dtypes(const std::vector<std::string>& include = {}, const std::vector<std::string>& exclude = {}) const

DataFrame

pd_dataframe.h:7315

View

DataFrame select_multiindex_columns(const std::vector<std::string>& first_level_keys) const

DataFrame

pd_dataframe.h:2870

View

DataFrame select_multiindex_partial(const std::vector<std::string>& key) const

DataFrame

pd_dataframe.h:3088

void set_attrs(const Attrs& attrs) override

void

pd_dataframe.h:1839

void set_column(const std::string& name, const Series<T>& series)

void

pd_dataframe.h:6890

View

void set_column_cat_categories(const std::string& col_name, const std::vector<std::string>& cats)

void

pd_dataframe.h:1750

View

void set_column_cat_ordered(const std::string& col_name, bool ordered)

void

pd_dataframe.h:1760

View

void set_column_datetime_array(const std::string& col_name, std::shared_ptr<pandas::DatetimeArray> ea)

void

pd_dataframe.h:1679

set_column_datetime_array(col_name, ea)

pd_dataframe.h:1744

void set_column_datetime_ea(const std::string& col_name, const std::vector<std::optional<pandas::Timestamp>>& ts_vec, const std::string& uniform_tz = "")

void

pd_dataframe.h:1739

View

void set_column_dtype(const std::string& col_name, const std::string& dtype_str)

void

pd_dataframe.h:1622

View

set_column_dtype(col_name, dt)

pd_dataframe.h:26410

View

void set_column_mask(const std::string& col_name, const numpy::NDArray<numpy::bool_>& mask)

void

pd_dataframe.h:1640

void set_column_mixed_tz_array(const std::string& col_name, std::shared_ptr<pandas::MixedTzDatetimeArray> ea)

void

pd_dataframe.h:1712

DataFrame& set_column_names(const std::vector<std::string>& new_names)

DataFrame&

pd_dataframe.h:5247

View

void set_column_resolved( const std::string& name, const ValueKindRange& kinds, const pandas::DtypeOverride& override_ = {})

void

pd_dataframe.h:7061

View

void set_column_sparse_fill_value(const std::string& col_name, double fv, const std::string& inner_name = "")

void

pd_dataframe.h:1775

View

void set_columns_index_dtype(const std::string& dtype)

void

pd_dataframe.h:1390

View

void set_columns_level_categories(std::vector<std::vector<std::string>> categories, std::vector<bool> ordered)

void

pd_dataframe.h:1489

void set_columns_levels(std::vector<std::vector<std::string>> levels, std::vector<std::string> names, std::vector<std::vector<std::string>> original_levels = {})

void

pd_dataframe.h:1465

View

void set_columns_name(const std::string& name)

void

pd_dataframe.h:1378

View

void set_datetime_index(const std::string& start, int periods, const std::string& freq)

void

pd_dataframe.h:4832

View

void set_display_options(size_t width, size_t max_colwidth, size_t max_rows = 0, bool multi_sparse = true, size_t max_cols = 0, bool width_truncation = false)

void

pd_dataframe.h:1439

View

void set_flags(const Flags& flags, bool copy = true, bool allows_duplicate_labels = true) override

void

pd_dataframe.h:1863

View

void set_frame_dtype_override(const std::string& v)

void

pd_dataframe.h:21722

void set_integer_index(std::initializer_list<int64_t> values)

void

pd_dataframe.h:4773

View

void set_integer_index(std::initializer_list<int> values)

void

pd_dataframe.h:4776

View

void set_multi_index( std::initializer_list<std::initializer_list<const char\*>> levels, const std::vector<std::optional<std::string>>& names = {})

void

pd_dataframe.h:4808

View

void set_multi_index(const std::vector<std::vector<std::string>>& levels, const std::vector<std::optional<std::string>>& names = {})

void

pd_dataframe.h:4820

View

void set_multiindex(MultiIndex mi)

void

pd_dataframe.h:1298

View

void set_multiindex( std::initializer_list<std::initializer_list<const char\*>> levels, const std::vector<std::optional<std::string>>& names = {})

void

pd_dataframe.h:1319

View

set_multiindex(MultiIndex::from_arrays<std::string>(arrays, names))

pd_dataframe.h:1328

View

void set_multiindex( const std::vector<std::vector<std::string>>& levels, const std::vector<std::optional<std::string>>& names = {})

void

pd_dataframe.h:1334

View

set_multiindex(MultiIndex::from_arrays<std::string>(levels, names))

pd_dataframe.h:1338

View

set_multiindex(std::move(mi))

pd_dataframe.h:4694

View

set_multiindex(MultiIndex::from_arrays<std::string>(arrays, names))

pd_dataframe.h:4817

View

set_multiindex(MultiIndex::from_arrays<std::string>(levels, names))

pd_dataframe.h:4823

View

void set_multiindex_names(const std::vector<std::optional<std::string>>& names)

void

pd_dataframe.h:1368

View

void set_nan_marker(const std::string& col_name, std::initializer_list<bool> mask_init)

void

pd_dataframe.h:4854

View

void set_option(const std::string& key, bool value)

void

pd_dataframe.h:1453

View

void set_reduced_series_repr(const std::string& repr)

void

pd_dataframe.h:1407

void set_string_index(std::initializer_list<const char\*> values)

void

pd_dataframe.h:4770

View

static size_t setw_byte_target(const std::string& val, size_t target_codepoint_width)

static size_t

pd_dataframe.h:10816

std::vector<size_t> shape() const override

std::vector<size_t>

pd_dataframe.h:1203

View

std::pair<size_t, size_t> shape_2d() const { return {nrows(), ncols()}

std::pair<size_t, size_t>

pd_dataframe.h:1210

View

size_t size() const override

size_t

pd_dataframe.h:1189

View

std::vector<std::vector<std::string>> sliced_arrays(mi.nlevels())

std::vector<std::vector<std::string>>

pd_dataframe.h:3943

std::vector<std::vector<std::string>> sorted_levels(columns_levels_.size())

std::vector<std::vector<std::string>>

pd_dataframe.h:14978

DataFrameSparseAccessor sparse() const

DataFrameSparseAccessor

pd_dataframe.h:1899

View

std::stringstream ss(to_split)

std::stringstream

pd_dataframe.h:18002

View

static int64_t timedelta_unit_factor(const std::string& dt)

static int64_t

pd_dataframe.h:7078

trim(inner)

pd_dataframe.h:22149

View

trim(fill_spelled)

pd_dataframe.h:22150

View

DataFrame truncate(const std::optional<std::string>& before = std::nullopt, const std::optional<std::string>& after = std::nullopt, int axis = 0, bool copy = true) const

DataFrame

pd_dataframe.h:27012

View

pandas::Timestamp ts(columns_index_[i])

pandas::Timestamp

pd_dataframe.h:1428

View

static size_t unicode_display_width(const std::string& s)

static size_t

pd_dataframe.h:10798

numpy::NDArray<numpy::float64> values() const

numpy::NDArray<numpy::float64>

pd_dataframe.h:1810

View

Code Examples#

The following examples are extracted from the test suite.

DataFrame (pd_test_1_all.cpp:22011)
22001        void pd_test_where_basic() {
22002            std::cout << "========= where basic functionality =======================";
22003
22004            // Create DataFrame
22005            std::map<std::string, std::vector<double>> data;
22006            data["A"] = {1.0, 2.0, 3.0, 4.0};
22007            data["B"] = {5.0, 6.0, 7.0, 8.0};
22008            pandas::DataFrame df(data);
22009
22010            // Create condition DataFrame (values > 2)
22011            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22013            cond_data["B"] = {true, true, true, true};     // all >2
22014            pandas::DataFrame cond(cond_data);
22015
22016            // Apply where with replacement value -1
22017            pandas::DataFrame result = df.where(cond, -1.0);
22018
22019            // Get column index for A - it's sorted alphabetically in std::map
22020            size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001        void pd_test_where_basic() {
22002            std::cout << "========= where basic functionality =======================";
22003
22004            // Create DataFrame
22005            std::map<std::string, std::vector<double>> data;
22006            data["A"] = {1.0, 2.0, 3.0, 4.0};
22007            data["B"] = {5.0, 6.0, 7.0, 8.0};
22008            pandas::DataFrame df(data);
22009
22010            // Create condition DataFrame (values > 2)
22011            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22013            cond_data["B"] = {true, true, true, true};     // all >2
22014            pandas::DataFrame cond(cond_data);
22015
22016            // Apply where with replacement value -1
22017            pandas::DataFrame result = df.where(cond, -1.0);
22018
22019            // Get column index for A - it's sorted alphabetically in std::map
22020            size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001        void pd_test_where_basic() {
22002            std::cout << "========= where basic functionality =======================";
22003
22004            // Create DataFrame
22005            std::map<std::string, std::vector<double>> data;
22006            data["A"] = {1.0, 2.0, 3.0, 4.0};
22007            data["B"] = {5.0, 6.0, 7.0, 8.0};
22008            pandas::DataFrame df(data);
22009
22010            // Create condition DataFrame (values > 2)
22011            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22013            cond_data["B"] = {true, true, true, true};     // all >2
22014            pandas::DataFrame cond(cond_data);
22015
22016            // Apply where with replacement value -1
22017            pandas::DataFrame result = df.where(cond, -1.0);
22018
22019            // Get column index for A - it's sorted alphabetically in std::map
22020            size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001        void pd_test_where_basic() {
22002            std::cout << "========= where basic functionality =======================";
22003
22004            // Create DataFrame
22005            std::map<std::string, std::vector<double>> data;
22006            data["A"] = {1.0, 2.0, 3.0, 4.0};
22007            data["B"] = {5.0, 6.0, 7.0, 8.0};
22008            pandas::DataFrame df(data);
22009
22010            // Create condition DataFrame (values > 2)
22011            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22013            cond_data["B"] = {true, true, true, true};     // all >2
22014            pandas::DataFrame cond(cond_data);
22015
22016            // Apply where with replacement value -1
22017            pandas::DataFrame result = df.where(cond, -1.0);
22018
22019            // Get column index for A - it's sorted alphabetically in std::map
22020            size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001        void pd_test_where_basic() {
22002            std::cout << "========= where basic functionality =======================";
22003
22004            // Create DataFrame
22005            std::map<std::string, std::vector<double>> data;
22006            data["A"] = {1.0, 2.0, 3.0, 4.0};
22007            data["B"] = {5.0, 6.0, 7.0, 8.0};
22008            pandas::DataFrame df(data);
22009
22010            // Create condition DataFrame (values > 2)
22011            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22013            cond_data["B"] = {true, true, true, true};     // all >2
22014            pandas::DataFrame cond(cond_data);
22015
22016            // Apply where with replacement value -1
22017            pandas::DataFrame result = df.where(cond, -1.0);
22018
22019            // Get column index for A - it's sorted alphabetically in std::map
22020            size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001        void pd_test_where_basic() {
22002            std::cout << "========= where basic functionality =======================";
22003
22004            // Create DataFrame
22005            std::map<std::string, std::vector<double>> data;
22006            data["A"] = {1.0, 2.0, 3.0, 4.0};
22007            data["B"] = {5.0, 6.0, 7.0, 8.0};
22008            pandas::DataFrame df(data);
22009
22010            // Create condition DataFrame (values > 2)
22011            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22013            cond_data["B"] = {true, true, true, true};     // all >2
22014            pandas::DataFrame cond(cond_data);
22015
22016            // Apply where with replacement value -1
22017            pandas::DataFrame result = df.where(cond, -1.0);
22018
22019            // Get column index for A - it's sorted alphabetically in std::map
22020            size_t col_a_idx = df.get_column_index("A");
DataFrame (pd_test_1_all.cpp:22011)
22001        void pd_test_where_basic() {
22002            std::cout << "========= where basic functionality =======================";
22003
22004            // Create DataFrame
22005            std::map<std::string, std::vector<double>> data;
22006            data["A"] = {1.0, 2.0, 3.0, 4.0};
22007            data["B"] = {5.0, 6.0, 7.0, 8.0};
22008            pandas::DataFrame df(data);
22009
22010            // Create condition DataFrame (values > 2)
22011            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22012            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22013            cond_data["B"] = {true, true, true, true};     // all >2
22014            pandas::DataFrame cond(cond_data);
22015
22016            // Apply where with replacement value -1
22017            pandas::DataFrame result = df.where(cond, -1.0);
22018
22019            // Get column index for A - it's sorted alphabetically in std::map
22020            size_t col_a_idx = df.get_column_index("A");
at (pd_test_1_all.cpp:6581)
6571            // Test isna/notna with float data
6572            {
6573                std::map<std::string, std::vector<numpy::float64>> float_data;
6574                float_data["X"] = {1.0, std::nan(""), 3.0};
6575                float_data["Y"] = {4.0, 5.0, std::nan("")};
6576                pandas::DataFrame df_na(float_data);
6577
6578                auto na_mask = df_na.isna();
6579                // Row 1, col 0 (X) should be NA
6580                if (!na_mask.getElementAt({1, 0})) {
6581                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (1,0) should be true" << std::endl;
6582                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (1,0)");
6583                }
6584                // Row 2, col 1 (Y) should be NA
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
at_time (pd_test_2_all.cpp:728)
718        std::cout << "====================================== [OK] pd_test_asof test suite ========================== " << std::endl;
719        return 0;
720    }
721
722} // namespace dataframe_tests
723// ------------------- pd_test_asof.cpp (end) -----------------------------
724
725// ------------------- pd_test_at_time.cpp (start) -----------------------------
726// dataframe_tests/pd_test_at_time.cpp
727// Tests for DataFrame.at_time() method (pandas 2.0+ API)
728// Selects values at particular time of day from datetime-indexed DataFrame
729#include <iostream>
730#include <stdexcept>
731#include <vector>
732#include <string>
733#include <map>
734#include "../pandas/pd_dataframe.h"
735
736// CRITICAL: No using namespace directives
attrs (pd_test_1_all.cpp:16361)
16351        // =====================================================================
16352        // Series Attrs Integration Tests
16353        // =====================================================================
16354
16355        void pd_test_ndframe_series_attrs() {
16356            std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358            pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360            // Test setting attrs on Series
16361            s.attrs().set("source", std::string("test_data"));
16362            s.attrs().set("timestamp", 1234567890);
16363
16364            bool passed = s.attrs().get<std::string>("source") == "test_data";
16365            if (!passed) {
16366                std::cout << "  [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367                throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368            }
16369
16370            passed = s.attrs().get<int>("timestamp") == 1234567890;
16371            if (!passed) {
attrs (pd_test_1_all.cpp:16361)
16351        // =====================================================================
16352        // Series Attrs Integration Tests
16353        // =====================================================================
16354
16355        void pd_test_ndframe_series_attrs() {
16356            std::cout << "========= series attrs integration =============================" << std::endl;
16357
16358            pandas::Series<double> s({1.0, 2.0, 3.0});
16359
16360            // Test setting attrs on Series
16361            s.attrs().set("source", std::string("test_data"));
16362            s.attrs().set("timestamp", 1234567890);
16363
16364            bool passed = s.attrs().get<std::string>("source") == "test_data";
16365            if (!passed) {
16366                std::cout << "  [FAIL] : in pd_test_ndframe_series_attrs() : set/get source" << std::endl;
16367                throw std::runtime_error("pd_test_ndframe_series_attrs failed: set/get source");
16368            }
16369
16370            passed = s.attrs().get<int>("timestamp") == 1234567890;
16371            if (!passed) {
first (pd_test_1_all.cpp:11616)
11606        void pd_test_groupby_first_last() {
11607            std::cout << "========= GroupBy first/last ====================";
11608
11609            std::map<std::string, std::vector<double>> data = {
11610                {"category", {1.0, 1.0, 2.0, 2.0}},
11611                {"value", {10.0, 20.0, 30.0, 40.0}}
11612            };
11613            pandas::DataFrame df(data);
11614
11615            auto first_result = df.groupby("category").first();
11616            auto last_result = df.groupby("category").last();
11617
11618            // First for group 1: 10, group 2: 30
11619            // Last for group 1: 20, group 2: 40
11620            double first1 = std::stod(first_result["value"].get_value_str(0));
11621            double first2 = std::stod(first_result["value"].get_value_str(1));
11622
11623            bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11624                          (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11625            if (!passed) {
first_valid_index (pd_test_1_all.cpp:20555)
20545            std::vector<double> values = {
20546                std::numeric_limits<double>::quiet_NaN(),
20547                std::numeric_limits<double>::quiet_NaN(),
20548                3.0,
20549                4.0,
20550                5.0
20551            };
20552            pandas::Series<double> s(values, "test");
20553
20554            auto first_idx = s.first_valid_index();
20555
20556            bool passed = first_idx.has_value() && first_idx.value() == 2;
20557
20558            if (!passed) {
20559                std::cout << "  [FAIL] : in pd_test_timeseries_first_valid_index() : expected index 2" << std::endl;
20560                throw std::runtime_error("pd_test_timeseries_first_valid_index failed");
20561            }
20562
20563            std::cout << " -> tests passed" << std::endl;
20564        }
get (pd_test_1_all.cpp:10290)
10280void pd_test_extension_index_get_loc_unique() {
10281    std::cout << "========= get_loc (unique) =========================";
10282
10283    pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10284    pandas::CategoricalIndex idx(arr);
10285
10286    auto loc_apple = idx.get_loc("apple");
10287    auto loc_banana = idx.get_loc("banana");
10288    auto loc_cherry = idx.get_loc("cherry");
10289
10290    bool passed = (std::holds_alternative<size_t>(loc_apple) && std::get<size_t>(loc_apple) == 0 &&
10291                   std::get<size_t>(loc_banana) == 1 &&
10292                   std::get<size_t>(loc_cherry) == 2);
10293    if (!passed) {
10294        std::cout << "  [FAIL] : in pd_test_extension_index_get_loc_unique() : get_loc check failed" << std::endl;
10295        throw std::runtime_error("pd_test_extension_index_get_loc_unique failed");
10296    }
10297
10298    std::cout << " -> tests passed" << std::endl;
10299}
get_column_as_series (pd_test_5_all.cpp:123545)
123535    auto tuples = mi.to_list();
123536    if (tuples.size() != nrows) {
123537        throw std::runtime_error(
123538            "format_extractall: MultiIndex size " + std::to_string(tuples.size()) +
123539            " != DataFrame nrows " + std::to_string(nrows));
123540    }
123541    // Pre-extract each column's string values to avoid repeated lookups.
123542    std::vector<std::vector<std::string>> col_values;
123543    col_values.reserve(ncols);
123544    for (size_t c = 0; c < ncols; ++c) {
123545        auto s = df.template get_column_as_series<std::string>(c);
123546        std::vector<std::string> vals;
123547        vals.reserve(s.size());
123548        for (size_t r = 0; r < s.size(); ++r) vals.push_back(s.at(r));
123549        col_values.push_back(std::move(vals));
123550    }
123551    for (size_t r = 0; r < nrows; ++r) {
123552        oss << "\n";
123553        // tuples[r] is a vector<string> of length 2.
123554        oss << tuples[r][0] << "," << tuples[r][1];
123555        for (size_t c = 0; c < ncols; ++c) {
get_column_as_string_series (pd_test_2_all.cpp:20370)
20360void pd_test_getitem_dispatch_category_metadata() {
20361    std::cout << "pd_test_getitem_dispatch_category_metadata" << std::endl;
20362    pandas::DataFrame df;
20363    std::vector<std::string> svals = {"a", "b", "a", "c"};
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
20377    check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
get_column_index (pd_test_1_all.cpp:22021)
22011            // Create condition DataFrame (values > 2)
22012            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22014            cond_data["B"] = {true, true, true, true};     // all >2
22015            pandas::DataFrame cond(cond_data);
22016
22017            // Apply where with replacement value -1
22018            pandas::DataFrame result = df.where(cond, -1.0);
22019
22020            // Get column index for A - it's sorted alphabetically in std::map
22021            size_t col_a_idx = df.get_column_index("A");
22022            size_t col_b_idx = df.get_column_index("B");
22023
22024            bool passed = true;
22025            std::string error_msg;
22026
22027            // Check A column values
22028            std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
22029            std::string a1 = result.iat<double>(1, col_a_idx) == -1.0 ? "ok" : "fail";
22030            std::string a2 = result.iat<double>(2, col_a_idx) == 3.0 ? "ok" : "fail";
22031            std::string a3 = result.iat<double>(3, col_a_idx) == 4.0 ? "ok" : "fail";
get_dummies (pd_test_3_all.cpp:13545)
13535    }
13536
13537    std::cout << " -> tests passed" << std::endl;
13538}
13539
13540// ============================================================================
13541// Get Dummies / From Dummies Tests
13542// ============================================================================
13543
13544void pd_test_top_level_get_dummies() {
13545    std::cout << "========= get_dummies() ===============================";
13546
13547    std::vector<std::string> data = {"A", "B", "A", "C", "B", "A"};
13548    pandas::Series<std::string> s(data, "category");
13549
13550    pandas::DataFrame result = pandas::get_dummies(s);
13551
13552    // Should have columns for A, B, C
13553    if (result.ncols() != 3) {
13554        std::cout << "  [FAIL] : in pd_test_top_level_get_dummies() : expected 3 columns" << std::endl;
13555        throw std::runtime_error("pd_test_top_level_get_dummies failed: wrong column count");
get_duplicate_columns (pd_test_2_all.cpp:20352)
20342}
20343
20344void pd_test_getitem_dispatch_get_duplicates() {
20345    std::cout << "pd_test_getitem_dispatch_get_duplicates" << std::endl;
20346    pandas::DataFrame df;
20347    std::vector<numpy::float64> v1 = {1.0, 2.0, 3.0};
20348    std::vector<numpy::float64> v2 = {4.0, 5.0, 6.0};
20349    df.insert(0, "col", std::make_unique<pandas::Series<numpy::float64>>(v1, "col"), true);
20350    df.insert(1, "col", std::make_unique<pandas::Series<numpy::float64>>(v2, "col"), true);
20351
20352    auto sub = df.get_duplicate_columns("col");
20353    check(sub.ncols() == 2, "dup ncols");
20354    check(sub.nrows() == 3, "dup nrows");
20355    // Both columns named "col"
20356    check(sub.columns().get_value_str(0) == "col", "dup col0 name");
20357    check(sub.columns().get_value_str(1) == "col", "dup col1 name");
20358}
20359
20360void pd_test_getitem_dispatch_category_metadata() {
20361    std::cout << "pd_test_getitem_dispatch_category_metadata" << std::endl;
20362    pandas::DataFrame df;
get_optional (pd_test_1_all.cpp:6741)
6731                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex cols");
6732                }
6733            }
6734
6735            // Test get_optional
6736            {
6737                std::map<std::string, std::vector<int>> data;
6738                data["A"] = {1, 2, 3};
6739                pandas::DataFrame df(data);
6740
6741                auto col_opt = df.get_optional("A");
6742                if (!col_opt.has_value()) {
6743                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : get_optional A should exist" << std::endl;
6744                    throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional A");
6745                }
6746
6747                auto missing = df.get_optional("Z");
6748                if (missing.has_value()) {
6749                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : get_optional Z should not exist" << std::endl;
6750                    throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional Z");
6751                }
get_series (pd_test_5_all.cpp:12970)
12960        pandas_tests::check(!threw, "query_bool_and_numeric.no_throw", local_fail);
12961        if (!threw) {
12962            pandas_tests::check(result.nrows() == 1, "query_bool_and_numeric.nrows == 1 (got " + std::to_string(result.nrows()) + ")", local_fail);
12963        }
12964    }
12965
12966    // === xs_level tests (Error 2) ===
12967    // Note: xs_level() doesn't exist yet — test will verify it after implementation
12968
12969    // === get_series + unstack tests (Error 1) ===
12970    // Note: get_series<T>() doesn't exist yet — test will verify it after implementation
12971
12972    if (local_fail > 0) {
12973        std::cout << "  [FAIL] : in f_test_anal_i_query_bool_unstack() : " << local_fail << " checks failed" << std::endl;
12974        throw std::runtime_error("f_test_anal_i_query_bool_unstack failed");
12975    }
12976    std::cout << " -> tests passed" << std::endl;
12977}
12978
12979
12980// --- cpp_f_test_zanal_a_column_width.cpp ---
get_value_bool (pd_test_5_all.cpp:35197)
35187    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35188    pandas_tests::check(df["X"].get_value_double(0) == 1.0, "case_2.idx0_one", local_fail);
35189    pandas_tests::check(std::isnan(df["X"].get_value_double(1)),
35190                        "case_2.idx1_nan", local_fail);
35191    pandas_tests::check(df["X"].get_value_double(2) == 0.0, "case_2.idx2_zero", local_fail);
35192}
35193
35194void bool_nullable_826495_case_3_get_value_bool_mask_aware(int& local_fail) {
35195    pandas::DataFrame df;
35196    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35197    pandas_tests::check(df["X"].get_value_bool(0) == true,  "case_3.idx0_true",   local_fail);
35198    pandas_tests::check(df["X"].get_value_bool(1) == false, "case_3.idx1_NA_false", local_fail);
35199    pandas_tests::check(df["X"].get_value_bool(2) == false, "case_3.idx2_false",  local_fail);
35200}
35201
35202void bool_nullable_826495_case_4_is_na_at_mask_aware(int& local_fail) {
35203    pandas::DataFrame df;
35204    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35205    pandas_tests::check(df["X"].is_na_at(0) == false, "case_4.idx0_not_na", local_fail);
35206    pandas_tests::check(df["X"].is_na_at(1) == true,  "case_4.idx1_is_na",  local_fail);
35207    pandas_tests::check(df["X"].is_na_at(2) == false, "case_4.idx2_not_na", local_fail);
head (pd_test_1_all.cpp:6301)
6291        void pd_test_dataframe_indexing() {
6292            std::cout << "========= indexing (loc/iloc) ==============";
6293
6294            std::map<std::string, std::vector<numpy::float64>> data;
6295            data["A"] = {10.0, 20.0, 30.0, 40.0, 50.0};
6296            data["B"] = {1.0, 2.0, 3.0, 4.0, 5.0};
6297
6298            pandas::DataFrame df(data);
6299
6300            // Test head
6301            auto head_df = df.head(3);
6302            if (head_df.nrows() != 3) {
6303                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : head(3) nrows != 3" << std::endl;
6304                throw std::runtime_error("pd_test_dataframe_indexing failed: head(3) nrows != 3");
6305            }
6306
6307            // Test tail
6308            auto tail_df = df.tail(2);
6309            if (tail_df.nrows() != 2) {
6310                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311                throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
iat (pd_test_1_all.cpp:22028)
22018            pandas::DataFrame result = df.where(cond, -1.0);
22019
22020            // Get column index for A - it's sorted alphabetically in std::map
22021            size_t col_a_idx = df.get_column_index("A");
22022            size_t col_b_idx = df.get_column_index("B");
22023
22024            bool passed = true;
22025            std::string error_msg;
22026
22027            // Check A column values
22028            std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
22029            std::string a1 = result.iat<double>(1, col_a_idx) == -1.0 ? "ok" : "fail";
22030            std::string a2 = result.iat<double>(2, col_a_idx) == 3.0 ? "ok" : "fail";
22031            std::string a3 = result.iat<double>(3, col_a_idx) == 4.0 ? "ok" : "fail";
22032
22033            if (a0 != "ok" || a1 != "ok" || a2 != "ok" || a3 != "ok") {
22034                passed = false;
22035                error_msg = "Column A values incorrect: A[0]=" + a0 + ", A[1]=" + a1 +
22036                            ", A[2]=" + a2 + ", A[3]=" + a3;
22037            }
idxmax (pd_test_1_all.cpp:23956)
23946        std::cout << "====================================== [OK] pd_test_ffill_bfill test suite ========================== " << std::endl;
23947        return 0;
23948    }
23949
23950} // namespace dataframe_tests
23951// ------------------- pd_test_ffill_bfill.cpp (end) -----------------------------
23952
23953// ------------------- pd_test_idxmax_idxmin.cpp (start) -----------------------------
23954// dataframe_tests/pd_test_idxmax_idxmin.cpp
23955// Test for DataFrame.idxmax() and idxmin() methods
23956
23957#include <iostream>
23958#include <stdexcept>
23959#include <cmath>
23960#include <limits>
23961#include "../pandas/pd_dataframe.h"
23962
23963// CRITICAL: No using namespace directives
23964
23965namespace dataframe_tests {
idxmin (pd_test_1_all.cpp:23956)
23946        std::cout << "====================================== [OK] pd_test_ffill_bfill test suite ========================== " << std::endl;
23947        return 0;
23948    }
23949
23950} // namespace dataframe_tests
23951// ------------------- pd_test_ffill_bfill.cpp (end) -----------------------------
23952
23953// ------------------- pd_test_idxmax_idxmin.cpp (start) -----------------------------
23954// dataframe_tests/pd_test_idxmax_idxmin.cpp
23955// Test for DataFrame.idxmax() and idxmin() methods
23956
23957#include <iostream>
23958#include <stdexcept>
23959#include <cmath>
23960#include <limits>
23961#include "../pandas/pd_dataframe.h"
23962
23963// CRITICAL: No using namespace directives
23964
23965namespace dataframe_tests {
iloc (pd_test_1_all.cpp:19149)
19139            pandas::Series<int> s({10, 20, 30, 40, 50});
19140
19141            // Positional indexing
19142            bool passed = s[0] == 10 && s[4] == 50 && s.at(2) == 30;
19143            if (!passed) {
19144                std::cout << "  [FAIL] : in pd_test_series_indexing() : positional indexing failed" << std::endl;
19145                throw std::runtime_error("pd_test_series_indexing failed: positional indexing");
19146            }
19147
19148            // iloc slice
19149            auto slice = s.iloc(1, 4);
19150            passed = slice.size() == 3 && slice[0] == 20 && slice[2] == 40;
19151            if (!passed) {
19152                std::cout << "  [FAIL] : in pd_test_series_indexing() : iloc slice failed" << std::endl;
19153                throw std::runtime_error("pd_test_series_indexing failed: iloc slice");
19154            }
19155
19156            // iloc with indices
19157            auto selected = s.iloc({0, 2, 4});
19158            passed = selected.size() == 3 && selected[0] == 10 && selected[1] == 30 && selected[2] == 50;
19159            if (!passed) {
iloc (pd_test_1_all.cpp:19149)
19139            pandas::Series<int> s({10, 20, 30, 40, 50});
19140
19141            // Positional indexing
19142            bool passed = s[0] == 10 && s[4] == 50 && s.at(2) == 30;
19143            if (!passed) {
19144                std::cout << "  [FAIL] : in pd_test_series_indexing() : positional indexing failed" << std::endl;
19145                throw std::runtime_error("pd_test_series_indexing failed: positional indexing");
19146            }
19147
19148            // iloc slice
19149            auto slice = s.iloc(1, 4);
19150            passed = slice.size() == 3 && slice[0] == 20 && slice[2] == 40;
19151            if (!passed) {
19152                std::cout << "  [FAIL] : in pd_test_series_indexing() : iloc slice failed" << std::endl;
19153                throw std::runtime_error("pd_test_series_indexing failed: iloc slice");
19154            }
19155
19156            // iloc with indices
19157            auto selected = s.iloc({0, 2, 4});
19158            passed = selected.size() == 3 && selected[0] == 10 && selected[1] == 30 && selected[2] == 50;
19159            if (!passed) {
iloc (pd_test_1_all.cpp:19149)
19139            pandas::Series<int> s({10, 20, 30, 40, 50});
19140
19141            // Positional indexing
19142            bool passed = s[0] == 10 && s[4] == 50 && s.at(2) == 30;
19143            if (!passed) {
19144                std::cout << "  [FAIL] : in pd_test_series_indexing() : positional indexing failed" << std::endl;
19145                throw std::runtime_error("pd_test_series_indexing failed: positional indexing");
19146            }
19147
19148            // iloc slice
19149            auto slice = s.iloc(1, 4);
19150            passed = slice.size() == 3 && slice[0] == 20 && slice[2] == 40;
19151            if (!passed) {
19152                std::cout << "  [FAIL] : in pd_test_series_indexing() : iloc slice failed" << std::endl;
19153                throw std::runtime_error("pd_test_series_indexing failed: iloc slice");
19154            }
19155
19156            // iloc with indices
19157            auto selected = s.iloc({0, 2, 4});
19158            passed = selected.size() == 3 && selected[0] == 10 && selected[1] == 30 && selected[2] == 50;
19159            if (!passed) {
iloc_rows (pd_test_1_all.cpp:6315)
6305            }
6306
6307            // Test tail
6308            auto tail_df = df.tail(2);
6309            if (tail_df.nrows() != 2) {
6310                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311                throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
6312            }
6313
6314            // Test iloc_rows range
6315            auto slice = df.iloc_rows(1, 4);
6316            if (slice.nrows() != 3) {
6317                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : iloc_rows(1,4) nrows != 3" << std::endl;
6318                throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows(1,4) nrows != 3");
6319            }
6320
6321            // Test iloc_rows with indices
6322            auto selected = df.iloc_rows(std::vector<size_t>{0, 2, 4});
6323            if (selected.nrows() != 3) {
6324                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : iloc_rows vector nrows != 3" << std::endl;
6325                throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows vector nrows != 3");
iloc_rows (pd_test_1_all.cpp:6315)
6305            }
6306
6307            // Test tail
6308            auto tail_df = df.tail(2);
6309            if (tail_df.nrows() != 2) {
6310                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311                throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
6312            }
6313
6314            // Test iloc_rows range
6315            auto slice = df.iloc_rows(1, 4);
6316            if (slice.nrows() != 3) {
6317                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : iloc_rows(1,4) nrows != 3" << std::endl;
6318                throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows(1,4) nrows != 3");
6319            }
6320
6321            // Test iloc_rows with indices
6322            auto selected = df.iloc_rows(std::vector<size_t>{0, 2, 4});
6323            if (selected.nrows() != 3) {
6324                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : iloc_rows vector nrows != 3" << std::endl;
6325                throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows vector nrows != 3");
last (pd_test_1_all.cpp:11617)
11607        void pd_test_groupby_first_last() {
11608            std::cout << "========= GroupBy first/last ====================";
11609
11610            std::map<std::string, std::vector<double>> data = {
11611                {"category", {1.0, 1.0, 2.0, 2.0}},
11612                {"value", {10.0, 20.0, 30.0, 40.0}}
11613            };
11614            pandas::DataFrame df(data);
11615
11616            auto first_result = df.groupby("category").first();
11617            auto last_result = df.groupby("category").last();
11618
11619            // First for group 1: 10, group 2: 30
11620            // Last for group 1: 20, group 2: 40
11621            double first1 = std::stod(first_result["value"].get_value_str(0));
11622            double first2 = std::stod(first_result["value"].get_value_str(1));
11623
11624            bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11625                          (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11626            if (!passed) {
11627                std::cout << "  [FAIL] : in pd_test_groupby_first_last() : first values incorrect" << std::endl;
last_valid_index (pd_test_1_all.cpp:20579)
20569            std::vector<double> values = {
20570                1.0,
20571                2.0,
20572                3.0,
20573                std::numeric_limits<double>::quiet_NaN(),
20574                std::numeric_limits<double>::quiet_NaN()
20575            };
20576            pandas::Series<double> s(values, "test");
20577
20578            auto last_idx = s.last_valid_index();
20579
20580            bool passed = last_idx.has_value() && last_idx.value() == 2;
20581
20582            if (!passed) {
20583                std::cout << "  [FAIL] : in pd_test_timeseries_last_valid_index() : expected index 2" << std::endl;
20584                throw std::runtime_error("pd_test_timeseries_last_valid_index failed");
20585            }
20586
20587            std::cout << " -> tests passed" << std::endl;
20588        }
loc (pd_test_3_all.cpp:10916)
10906        {{"A","A","B"}, {"x","y","x"}});
10907    df.set_index(mi);
10908    if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909        std::cout << "  [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910        throw std::runtime_error("set_index MultiIndex failed");
10911    }
10912    std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916    std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917    pandas::DataFrame df;
10918    df.add_column<int64_t>("val", {10, 20, 30, 40});
10919    auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920        {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921    df.set_multiindex(mi);
10922    pandas::DataFrame result = df.loc("London");
10923    if (result.nrows() != 2) {
10924        std::cout << "  [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925        throw std::runtime_error("loc single-arg failed");
10926    }
loc (pd_test_3_all.cpp:10916)
10906        {{"A","A","B"}, {"x","y","x"}});
10907    df.set_index(mi);
10908    if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909        std::cout << "  [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910        throw std::runtime_error("set_index MultiIndex failed");
10911    }
10912    std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916    std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917    pandas::DataFrame df;
10918    df.add_column<int64_t>("val", {10, 20, 30, 40});
10919    auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920        {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921    df.set_multiindex(mi);
10922    pandas::DataFrame result = df.loc("London");
10923    if (result.nrows() != 2) {
10924        std::cout << "  [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925        throw std::runtime_error("loc single-arg failed");
10926    }
loc (pd_test_3_all.cpp:10916)
10906        {{"A","A","B"}, {"x","y","x"}});
10907    df.set_index(mi);
10908    if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909        std::cout << "  [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910        throw std::runtime_error("set_index MultiIndex failed");
10911    }
10912    std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916    std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917    pandas::DataFrame df;
10918    df.add_column<int64_t>("val", {10, 20, 30, 40});
10919    auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920        {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921    df.set_multiindex(mi);
10922    pandas::DataFrame result = df.loc("London");
10923    if (result.nrows() != 2) {
10924        std::cout << "  [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925        throw std::runtime_error("loc single-arg failed");
10926    }
loc (pd_test_3_all.cpp:10916)
10906        {{"A","A","B"}, {"x","y","x"}});
10907    df.set_index(mi);
10908    if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909        std::cout << "  [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910        throw std::runtime_error("set_index MultiIndex failed");
10911    }
10912    std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916    std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917    pandas::DataFrame df;
10918    df.add_column<int64_t>("val", {10, 20, 30, 40});
10919    auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920        {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921    df.set_multiindex(mi);
10922    pandas::DataFrame result = df.loc("London");
10923    if (result.nrows() != 2) {
10924        std::cout << "  [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925        throw std::runtime_error("loc single-arg failed");
10926    }
loc (pd_test_3_all.cpp:10916)
10906        {{"A","A","B"}, {"x","y","x"}});
10907    df.set_index(mi);
10908    if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909        std::cout << "  [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910        throw std::runtime_error("set_index MultiIndex failed");
10911    }
10912    std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916    std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917    pandas::DataFrame df;
10918    df.add_column<int64_t>("val", {10, 20, 30, 40});
10919    auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920        {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921    df.set_multiindex(mi);
10922    pandas::DataFrame result = df.loc("London");
10923    if (result.nrows() != 2) {
10924        std::cout << "  [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925        throw std::runtime_error("loc single-arg failed");
10926    }
mask (pd_test_1_all.cpp:9119)
9109void pd_test_datetime_mixin_array_constructor() {
9110    std::cout << "========= DatetimeTDMixin array constructor =========================";
9111
9112    // Create DatetimeArray with some values
9113    numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9114    data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2001
9115    data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2017
9116    data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2020
9117
9118    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9119    mask.setElementAt({0}, numpy::bool_(false));
9120    mask.setElementAt({1}, numpy::bool_(false));
9121    mask.setElementAt({2}, numpy::bool_(false));
9122
9123    pandas::DatetimeArray arr(data, mask);
9124    pandas::DatetimeTDMixin idx(arr, "timestamps");
9125
9126    bool passed = (idx.size() == 3 && !idx.empty() &&
9127                   idx.name().has_value() && *idx.name() == "timestamps" &&
9128                   idx.inferred_type() == "datetime");
nlargest (pd_test_1_all.cpp:6425)
6415            // Test sort_values descending
6416            auto sorted_desc = df.sort_values("A", false);
6417            first_val = sorted_desc["A"].get_value_str(0);
6418            if (std::stod(first_val) != 5.0) {
6419                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values desc first != 5" << std::endl;
6420                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values desc first != 5");
6421            }
6422
6423            // Test nlargest
6424            auto largest = df.nlargest(2, "A");
6425            if (largest.nrows() != 2) {
6426                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : nlargest nrows != 2" << std::endl;
6427                throw std::runtime_error("pd_test_dataframe_sorting failed: nlargest nrows != 2");
6428            }
6429
6430            // Test nsmallest
6431            auto smallest = df.nsmallest(2, "A");
6432            if (smallest.nrows() != 2) {
6433                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : nsmallest nrows != 2" << std::endl;
6434                throw std::runtime_error("pd_test_dataframe_sorting failed: nsmallest nrows != 2");
nsmallest (pd_test_1_all.cpp:6432)
6422            }
6423
6424            // Test nlargest
6425            auto largest = df.nlargest(2, "A");
6426            if (largest.nrows() != 2) {
6427                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : nlargest nrows != 2" << std::endl;
6428                throw std::runtime_error("pd_test_dataframe_sorting failed: nlargest nrows != 2");
6429            }
6430
6431            // Test nsmallest
6432            auto smallest = df.nsmallest(2, "A");
6433            if (smallest.nrows() != 2) {
6434                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : nsmallest nrows != 2" << std::endl;
6435                throw std::runtime_error("pd_test_dataframe_sorting failed: nsmallest nrows != 2");
6436            }
6437
6438            std::cout << " -> tests passed" << std::endl;
6439        }
6440
6441        // =====================================================================
6442        // Test: Rank
query (pd_test_1_all.cpp:26418)
26408        std::cout << "====================================== [OK] pd_test_prod test suite ========================== " << std::endl;
26409        return 0;
26410    }
26411
26412} // namespace dataframe_tests
26413// ------------------- pd_test_prod.cpp (end) -----------------------------
26414
26415// ------------------- pd_test_query.cpp (start) -----------------------------
26416// dataframe_tests/pd_test_query.cpp
26417// Tests for DataFrame.query() method
26418
26419#include <iostream>
26420#include <stdexcept>
26421#include <map>
26422#include <vector>
26423#include <string>
26424#include "../pandas/pd_dataframe.h"
26425
26426namespace dataframe_tests {
26427    namespace dataframe_tests_query {
sample (pd_test_3_all.cpp:207)
197        if (df.index().dtype_name() != "int64") {
198            std::cout << "  [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199            throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200        }
201    }
202
203    std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207    std::cout << "========= DataFrame.sample() =======================";
208
209    std::map<std::string, std::vector<double>> data = {
210        {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211        {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212    };
213    pandas::DataFrame df(data);
214
215    // Sample 3 rows
216    pandas::DataFrame result = df.sample(3, 42);  // seed=42 for reproducibility
sample (pd_test_3_all.cpp:207)
197        if (df.index().dtype_name() != "int64") {
198            std::cout << "  [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199            throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200        }
201    }
202
203    std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207    std::cout << "========= DataFrame.sample() =======================";
208
209    std::map<std::string, std::vector<double>> data = {
210        {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211        {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212    };
213    pandas::DataFrame df(data);
214
215    // Sample 3 rows
216    pandas::DataFrame result = df.sample(3, 42);  // seed=42 for reproducibility
sample (pd_test_3_all.cpp:207)
197        if (df.index().dtype_name() != "int64") {
198            std::cout << "  [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199            throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200        }
201    }
202
203    std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207    std::cout << "========= DataFrame.sample() =======================";
208
209    std::map<std::string, std::vector<double>> data = {
210        {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211        {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212    };
213    pandas::DataFrame df(data);
214
215    // Sample 3 rows
216    pandas::DataFrame result = df.sample(3, 42);  // seed=42 for reproducibility
sample (pd_test_3_all.cpp:207)
197        if (df.index().dtype_name() != "int64") {
198            std::cout << "  [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199            throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200        }
201    }
202
203    std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207    std::cout << "========= DataFrame.sample() =======================";
208
209    std::map<std::string, std::vector<double>> data = {
210        {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211        {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212    };
213    pandas::DataFrame df(data);
214
215    // Sample 3 rows
216    pandas::DataFrame result = df.sample(3, 42);  // seed=42 for reproducibility
sample (pd_test_3_all.cpp:207)
197        if (df.index().dtype_name() != "int64") {
198            std::cout << "  [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199            throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200        }
201    }
202
203    std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207    std::cout << "========= DataFrame.sample() =======================";
208
209    std::map<std::string, std::vector<double>> data = {
210        {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211        {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212    };
213    pandas::DataFrame df(data);
214
215    // Sample 3 rows
216    pandas::DataFrame result = df.sample(3, 42);  // seed=42 for reproducibility
sample_frac (pd_test_5_all.cpp:2387)
2377    // Build stats with string index (asset tickers), then shuffle and sort_index
2378    pandas::DataFrame stats;
2379    stats.add_column<double>("Mean",   { 0.002212, 0.001136, 0.000029, 0.000988, -0.000533, 0.000408, 0.000692});
2380    stats.add_column<double>("Std",    { 0.017966, 0.016225, 0.020372, 0.021692,  0.025519, 0.026644, 0.014562});
2381    stats.add_column<double>("Sharpe", { 1.955,    1.111,    0.023,    0.723,    -0.331,    0.243,    0.755});
2382    stats.add_column<double>("Skew",   {-0.049545, 0.024990,-0.075349,-0.009316,  0.088344, 0.017202, 0.182708});
2383    stats.add_column<double>("Kurt",   {-0.053650,-0.085255,-0.098852, 0.069301,  0.370648,-0.038772, 0.079181});
2384    stats.set_index_from_list({"AAPL", "MSFT", "GOOGL", "AMZN", "META", "NVDA", "JPM"});
2385
2386    // Shuffle: sample(frac=1, random_state=42) produces order: NVDA,AMZN,JPM,META,AAPL,MSFT,GOOGL
2387    auto shuf = stats.sample_frac(1.0, 42);
2388    auto si = shuf.sort_index();
2389
2390    std::string expected =
2391        "           Mean       Std  Sharpe      Skew      Kurt\n"
2392        "AAPL   0.002212  0.017966   1.955 -0.049545 -0.053650\n"
2393        "AMZN   0.000988  0.021692   0.723 -0.009316  0.069301\n"
2394        "GOOGL  0.000029  0.020372   0.023 -0.075349 -0.098852\n"
2395        "JPM    0.000692  0.014562   0.755  0.182708  0.079181\n"
2396        "META  -0.000533  0.025519  -0.331  0.088344  0.370648\n"
2397        "MSFT   0.001136  0.016225   1.111  0.024990 -0.085255\n"
set_value_nan (pd_test_5_all.cpp:18478)
18468        "0    a\n"
18469        "1  NaN\n"
18470        "2    c";
18471    bool ok = (actual == expected);
18472    pandas_tests::check(ok, "where_mask_dtype_promotion_2_503514_case_10_str_col_where_default.to_string", local_fail);
18473    if (!ok) dump_diff("case_10", expected, actual);
18474}
18475
18476void where_mask_dtype_promotion_2_503514_case_11_get_value_str_mask_int_renders_NaN(int& local_fail) {
18477    pandas::Series<std::int64_t> s({10, 20, 30});
18478    s.set_value_nan(0);
18479
18480    std::string actual = s.get_value_str(0);
18481    std::string expected = "NaN";
18482    bool ok = (actual == expected);
18483    pandas_tests::check(ok, "where_mask_dtype_promotion_2_503514_case_11_get_value_str_mask_int_renders_NaN (got " +
18484          actual + ")", local_fail);
18485
18486    bool ok1 = (s.get_value_str(1) == "20");
18487    bool ok2 = (s.get_value_str(2) == "30");
18488    pandas_tests::check(ok1, "case_11.kept_idx1_eq_20", local_fail);
tail (pd_test_1_all.cpp:6308)
6298            pandas::DataFrame df(data);
6299
6300            // Test head
6301            auto head_df = df.head(3);
6302            if (head_df.nrows() != 3) {
6303                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : head(3) nrows != 3" << std::endl;
6304                throw std::runtime_error("pd_test_dataframe_indexing failed: head(3) nrows != 3");
6305            }
6306
6307            // Test tail
6308            auto tail_df = df.tail(2);
6309            if (tail_df.nrows() != 2) {
6310                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311                throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
6312            }
6313
6314            // Test iloc_rows range
6315            auto slice = df.iloc_rows(1, 4);
6316            if (slice.nrows() != 3) {
6317                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : iloc_rows(1,4) nrows != 3" << std::endl;
6318                throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows(1,4) nrows != 3");
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897    std::cout << "========= inherited take ==============================";
5898
5899    pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900    pandas::CategoricalIndex idx(arr);
5901
5902    std::vector<size_t> indices = {0, 2, 3};
5903    pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905    bool passed = (taken.size() == 3);
5906    if (!passed) {
5907        std::cout << "  [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908        throw std::runtime_error("pd_test_categorical_index_take failed");
5909    }
5910
5911    std::cout << " -> tests passed" << std::endl;
5912}
where (pd_test_1_all.cpp:22018)
22008            data["B"] = {5.0, 6.0, 7.0, 8.0};
22009            pandas::DataFrame df(data);
22010
22011            // Create condition DataFrame (values > 2)
22012            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22014            cond_data["B"] = {true, true, true, true};     // all >2
22015            pandas::DataFrame cond(cond_data);
22016
22017            // Apply where with replacement value -1
22018            pandas::DataFrame result = df.where(cond, -1.0);
22019
22020            // Get column index for A - it's sorted alphabetically in std::map
22021            size_t col_a_idx = df.get_column_index("A");
22022            size_t col_b_idx = df.get_column_index("B");
22023
22024            bool passed = true;
22025            std::string error_msg;
22026
22027            // Check A column values
22028            std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where (pd_test_1_all.cpp:22018)
22008            data["B"] = {5.0, 6.0, 7.0, 8.0};
22009            pandas::DataFrame df(data);
22010
22011            // Create condition DataFrame (values > 2)
22012            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22014            cond_data["B"] = {true, true, true, true};     // all >2
22015            pandas::DataFrame cond(cond_data);
22016
22017            // Apply where with replacement value -1
22018            pandas::DataFrame result = df.where(cond, -1.0);
22019
22020            // Get column index for A - it's sorted alphabetically in std::map
22021            size_t col_a_idx = df.get_column_index("A");
22022            size_t col_b_idx = df.get_column_index("B");
22023
22024            bool passed = true;
22025            std::string error_msg;
22026
22027            // Check A column values
22028            std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where (pd_test_1_all.cpp:22018)
22008            data["B"] = {5.0, 6.0, 7.0, 8.0};
22009            pandas::DataFrame df(data);
22010
22011            // Create condition DataFrame (values > 2)
22012            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22014            cond_data["B"] = {true, true, true, true};     // all >2
22015            pandas::DataFrame cond(cond_data);
22016
22017            // Apply where with replacement value -1
22018            pandas::DataFrame result = df.where(cond, -1.0);
22019
22020            // Get column index for A - it's sorted alphabetically in std::map
22021            size_t col_a_idx = df.get_column_index("A");
22022            size_t col_b_idx = df.get_column_index("B");
22023
22024            bool passed = true;
22025            std::string error_msg;
22026
22027            // Check A column values
22028            std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where_resolved (pd_test_5_all.cpp:91939)
91929    auto cond_df = make_2x3_cond_mixed();
91930
91931    pandas::WhereCondition cond;
91932    cond.kind = pandas::WhereCondKind::DATAFRAME;
91933    cond.dataframe_ptr = &cond_df;
91934
91935    pandas::WhereOther other;
91936    other.kind = pandas::WhereOtherKind::SCALAR;
91937    other.scalar_val = -999.0;
91938
91939    pandas::DataFrame result = df.where_resolved(cond, other, /*axis=*/0);
91940
91941    pandas_tests::check(result.nrows() == 2,
91942        "case_1_cond_df_other_scalar.nrows_eq_2", local_fail);
91943    pandas_tests::check(result.ncols() == 3,
91944        "case_1_cond_df_other_scalar.ncols_eq_3", local_fail);
91945    pandas_tests::check(!result.to_string().empty(),
91946        "case_1_cond_df_other_scalar.to_string_nonempty", local_fail);
91947}
91948
91949void case_2_cond_df_other_series() {
xs (pd_test_2_all.cpp:18668)
18658        std::cout << "====================================== [OK] pd_test_tz_localize test suite ========================== " << std::endl;
18659        return 0;
18660    }
18661
18662} // namespace dataframe_tests
18663// ------------------- pd_test_tz_localize.cpp (end) -----------------------------
18664
18665// ------------------- pd_test_xs.cpp (start) -----------------------------
18666// dataframe_tests/pd_test_xs.cpp
18667// Tests for DataFrame xs() (cross-section) implementation
18668
18669#include <iostream>
18670#include <stdexcept>
18671#include <vector>
18672#include <string>
18673#include <map>
18674
18675#include "../pandas/pd_dataframe.h"
18676
18677// CRITICAL: No using namespace directives
xs (pd_test_2_all.cpp:18668)
18658        std::cout << "====================================== [OK] pd_test_tz_localize test suite ========================== " << std::endl;
18659        return 0;
18660    }
18661
18662} // namespace dataframe_tests
18663// ------------------- pd_test_tz_localize.cpp (end) -----------------------------
18664
18665// ------------------- pd_test_xs.cpp (start) -----------------------------
18666// dataframe_tests/pd_test_xs.cpp
18667// Tests for DataFrame xs() (cross-section) implementation
18668
18669#include <iostream>
18670#include <stdexcept>
18671#include <vector>
18672#include <string>
18673#include <map>
18674
18675#include "../pandas/pd_dataframe.h"
18676
18677// CRITICAL: No using namespace directives
xs (pd_test_2_all.cpp:18668)
18658        std::cout << "====================================== [OK] pd_test_tz_localize test suite ========================== " << std::endl;
18659        return 0;
18660    }
18661
18662} // namespace dataframe_tests
18663// ------------------- pd_test_tz_localize.cpp (end) -----------------------------
18664
18665// ------------------- pd_test_xs.cpp (start) -----------------------------
18666// dataframe_tests/pd_test_xs.cpp
18667// Tests for DataFrame xs() (cross-section) implementation
18668
18669#include <iostream>
18670#include <stdexcept>
18671#include <vector>
18672#include <string>
18673#include <map>
18674
18675#include "../pandas/pd_dataframe.h"
18676
18677// CRITICAL: No using namespace directives
xs_level (pd_test_5_all.cpp:12967)
12957        } catch (const std::exception&) {
12958            threw = true;
12959        }
12960        pandas_tests::check(!threw, "query_bool_and_numeric.no_throw", local_fail);
12961        if (!threw) {
12962            pandas_tests::check(result.nrows() == 1, "query_bool_and_numeric.nrows == 1 (got " + std::to_string(result.nrows()) + ")", local_fail);
12963        }
12964    }
12965
12966    // === xs_level tests (Error 2) ===
12967    // Note: xs_level() doesn't exist yet — test will verify it after implementation
12968
12969    // === get_series + unstack tests (Error 1) ===
12970    // Note: get_series<T>() doesn't exist yet — test will verify it after implementation
12971
12972    if (local_fail > 0) {
12973        std::cout << "  [FAIL] : in f_test_anal_i_query_bool_unstack() : " << local_fail << " checks failed" << std::endl;
12974        throw std::runtime_error("f_test_anal_i_query_bool_unstack failed");
12975    }
12976    std::cout << " -> tests passed" << std::endl;
12977}
assign (pd_test_1_all.cpp:6653)
6643                    throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644                }
6645            }
6646
6647            // Test assign
6648            {
6649                std::map<std::string, std::vector<numpy::int64>> assign_data;
6650                assign_data["A"] = {1, 2, 3};
6651                pandas::DataFrame df_assign(assign_data);
6652
6653                auto df2 = df_assign.assign<numpy::int64>("B", {10, 20, 30});
6654                if (df2.ncols() != 2) {
6655                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : assign ncols != 2" << std::endl;
6656                    throw std::runtime_error("pd_test_dataframe_manipulation failed: assign ncols");
6657                }
6658                if (!df2.has_column("B")) {
6659                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : assign didn't add column B" << std::endl;
6660                    throw std::runtime_error("pd_test_dataframe_manipulation failed: assign column B");
6661                }
6662            }
assign (pd_test_1_all.cpp:6653)
6643                    throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644                }
6645            }
6646
6647            // Test assign
6648            {
6649                std::map<std::string, std::vector<numpy::int64>> assign_data;
6650                assign_data["A"] = {1, 2, 3};
6651                pandas::DataFrame df_assign(assign_data);
6652
6653                auto df2 = df_assign.assign<numpy::int64>("B", {10, 20, 30});
6654                if (df2.ncols() != 2) {
6655                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : assign ncols != 2" << std::endl;
6656                    throw std::runtime_error("pd_test_dataframe_manipulation failed: assign ncols");
6657                }
6658                if (!df2.has_column("B")) {
6659                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : assign didn't add column B" << std::endl;
6660                    throw std::runtime_error("pd_test_dataframe_manipulation failed: assign column B");
6661                }
6662            }
assign (pd_test_1_all.cpp:6653)
6643                    throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644                }
6645            }
6646
6647            // Test assign
6648            {
6649                std::map<std::string, std::vector<numpy::int64>> assign_data;
6650                assign_data["A"] = {1, 2, 3};
6651                pandas::DataFrame df_assign(assign_data);
6652
6653                auto df2 = df_assign.assign<numpy::int64>("B", {10, 20, 30});
6654                if (df2.ncols() != 2) {
6655                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : assign ncols != 2" << std::endl;
6656                    throw std::runtime_error("pd_test_dataframe_manipulation failed: assign ncols");
6657                }
6658                if (!df2.has_column("B")) {
6659                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : assign didn't add column B" << std::endl;
6660                    throw std::runtime_error("pd_test_dataframe_manipulation failed: assign column B");
6661                }
6662            }
drop (pd_test_1_all.cpp:6558)
6548            if (df.ncols() != 2) {
6549                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550                throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551            }
6552            if (!popped) {
6553                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554                throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555            }
6556
6557            // Test drop columns
6558            auto dropped = df.drop(std::vector<std::string>{"B"}, 1);
6559            if (dropped.ncols() != 1) {
6560                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : drop ncols != 1" << std::endl;
6561                throw std::runtime_error("pd_test_dataframe_manipulation failed: drop ncols != 1");
6562            }
6563
6564            // Test rename
6565            auto renamed = df.rename_columns(std::map<std::string, std::string>{{"A", "X"}});
6566            if (!renamed.has_column("X")) {
6567                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : rename failed" << std::endl;
6568                throw std::runtime_error("pd_test_dataframe_manipulation failed: rename failed");
drop_duplicates (pd_test_1_all.cpp:6639)
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
6634                std::map<std::string, std::vector<numpy::int64>> dup_data;
6635                dup_data["A"] = {1, 1, 2, 2};
6636                dup_data["B"] = {1, 1, 2, 3};
6637                pandas::DataFrame df_dup(dup_data);
6638
6639                auto deduped = df_dup.drop_duplicates();
6640                // Rows 0 and 1 are duplicates (A=1, B=1), so should have 3 rows
6641                if (deduped.nrows() != 3) {
6642                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : drop_duplicates nrows != 3, got " << deduped.nrows() << std::endl;
6643                    throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644                }
6645            }
6646
6647            // Test assign
6648            {
6649                std::map<std::string, std::vector<numpy::int64>> assign_data;
droplevel (pd_test_1_all.cpp:14428)
14418        void pd_test_multiindex_droplevel() {
14419            std::cout << "========= droplevel =================================== ";
14420
14421            std::vector<std::vector<std::string>> arrays = {
14422                {"a", "a", "b"},
14423                {"x", "y", "z"},
14424                {"1", "2", "3"}
14425            };
14426
14427            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428            pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430            bool passed = true;
14431
14432            if (dropped.nlevels() != 2) {
14433                std::cout << "  [FAIL] : nlevels should be 2 after drop" << std::endl;
14434                passed = false;
14435            }
14436
14437            // Check remaining levels
14438            auto tup = dropped[0];
droplevel (pd_test_1_all.cpp:14428)
14418        void pd_test_multiindex_droplevel() {
14419            std::cout << "========= droplevel =================================== ";
14420
14421            std::vector<std::vector<std::string>> arrays = {
14422                {"a", "a", "b"},
14423                {"x", "y", "z"},
14424                {"1", "2", "3"}
14425            };
14426
14427            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428            pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430            bool passed = true;
14431
14432            if (dropped.nlevels() != 2) {
14433                std::cout << "  [FAIL] : nlevels should be 2 after drop" << std::endl;
14434                passed = false;
14435            }
14436
14437            // Check remaining levels
14438            auto tup = dropped[0];
droplevel (pd_test_1_all.cpp:14428)
14418        void pd_test_multiindex_droplevel() {
14419            std::cout << "========= droplevel =================================== ";
14420
14421            std::vector<std::vector<std::string>> arrays = {
14422                {"a", "a", "b"},
14423                {"x", "y", "z"},
14424                {"1", "2", "3"}
14425            };
14426
14427            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428            pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430            bool passed = true;
14431
14432            if (dropped.nlevels() != 2) {
14433                std::cout << "  [FAIL] : nlevels should be 2 after drop" << std::endl;
14434                passed = false;
14435            }
14436
14437            // Check remaining levels
14438            auto tup = dropped[0];
droplevel (pd_test_1_all.cpp:14428)
14418        void pd_test_multiindex_droplevel() {
14419            std::cout << "========= droplevel =================================== ";
14420
14421            std::vector<std::vector<std::string>> arrays = {
14422                {"a", "a", "b"},
14423                {"x", "y", "z"},
14424                {"1", "2", "3"}
14425            };
14426
14427            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428            pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430            bool passed = true;
14431
14432            if (dropped.nlevels() != 2) {
14433                std::cout << "  [FAIL] : nlevels should be 2 after drop" << std::endl;
14434                passed = false;
14435            }
14436
14437            // Check remaining levels
14438            auto tup = dropped[0];
dropna (pd_test_1_all.cpp:531)
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
dropna (pd_test_1_all.cpp:531)
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
insert (pd_test_1_all.cpp:12028)
12018            }
12019
12020            std::cout << " -> tests passed" << std::endl;
12021        }
12022
12023        void pd_test_index_insert_delete() {
12024            std::cout << "========= insert and delete ===========================";
12025
12026            pandas::Index<numpy::int64> idx{1, 2, 4, 5};
12027
12028            auto inserted = idx.insert(2, 3);
12029            bool passed = (inserted.size() == 5);
12030            passed = passed && (inserted[2] == 3);
12031
12032            auto deleted = inserted.delete_(2);
12033            passed = passed && (deleted.size() == 4);
12034            passed = passed && deleted.equals(idx);
12035
12036            if (!passed) {
12037                std::cout << "  [FAIL] : in pd_test_index_insert_delete() : insert/delete failed" << std::endl;
12038                throw std::runtime_error("pd_test_index_insert_delete failed");
reindex (pd_test_1_all.cpp:6708)
6698                }
6699            }
6700
6701            // Test reindex rows
6702            {
6703                std::map<std::string, std::vector<double>> data;
6704                data["A"] = {1.0, 2.0, 3.0};
6705                pandas::DataFrame df(data);
6706                df = df.set_axis({"x", "y", "z"}, 0);
6707
6708                auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709                if (reindexed.nrows() != 3) {
6710                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712                }
6713                // 'w' should have NaN
6714                std::string val = reindexed["A"].get_value_str(2);
6715                if (!std::isnan(std::stod(val))) {
6716                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718                }
reindex (pd_test_1_all.cpp:6708)
6698                }
6699            }
6700
6701            // Test reindex rows
6702            {
6703                std::map<std::string, std::vector<double>> data;
6704                data["A"] = {1.0, 2.0, 3.0};
6705                pandas::DataFrame df(data);
6706                df = df.set_axis({"x", "y", "z"}, 0);
6707
6708                auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709                if (reindexed.nrows() != 3) {
6710                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712                }
6713                // 'w' should have NaN
6714                std::string val = reindexed["A"].get_value_str(2);
6715                if (!std::isnan(std::stod(val))) {
6716                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718                }
reindex (pd_test_1_all.cpp:6708)
6698                }
6699            }
6700
6701            // Test reindex rows
6702            {
6703                std::map<std::string, std::vector<double>> data;
6704                data["A"] = {1.0, 2.0, 3.0};
6705                pandas::DataFrame df(data);
6706                df = df.set_axis({"x", "y", "z"}, 0);
6707
6708                auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709                if (reindexed.nrows() != 3) {
6710                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712                }
6713                // 'w' should have NaN
6714                std::string val = reindexed["A"].get_value_str(2);
6715                if (!std::isnan(std::stod(val))) {
6716                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718                }
reindex (pd_test_1_all.cpp:6708)
6698                }
6699            }
6700
6701            // Test reindex rows
6702            {
6703                std::map<std::string, std::vector<double>> data;
6704                data["A"] = {1.0, 2.0, 3.0};
6705                pandas::DataFrame df(data);
6706                df = df.set_axis({"x", "y", "z"}, 0);
6707
6708                auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709                if (reindexed.nrows() != 3) {
6710                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712                }
6713                // 'w' should have NaN
6714                std::string val = reindexed["A"].get_value_str(2);
6715                if (!std::isnan(std::stod(val))) {
6716                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718                }
reindex (pd_test_1_all.cpp:6708)
6698                }
6699            }
6700
6701            // Test reindex rows
6702            {
6703                std::map<std::string, std::vector<double>> data;
6704                data["A"] = {1.0, 2.0, 3.0};
6705                pandas::DataFrame df(data);
6706                df = df.set_axis({"x", "y", "z"}, 0);
6707
6708                auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709                if (reindexed.nrows() != 3) {
6710                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712                }
6713                // 'w' should have NaN
6714                std::string val = reindexed["A"].get_value_str(2);
6715                if (!std::isnan(std::stod(val))) {
6716                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718                }
reindex_like (pd_test_1_all.cpp:6777)
6767                data1["A"] = {1, 2};
6768                data1["B"] = {3, 4};
6769                pandas::DataFrame df1(data1);
6770
6771                std::map<std::string, std::vector<int>> data2;
6772                data2["B"] = {10, 20, 30};
6773                data2["C"] = {40, 50, 60};
6774                pandas::DataFrame df2(data2);
6775                df2 = df2.set_axis({"x", "y", "z"}, 0);
6776
6777                auto reindexed = df1.reindex_like(df2);
6778                if (reindexed.nrows() != 3 || reindexed.ncols() != 2) {
6779                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : reindex_like wrong shape" << std::endl;
6780                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex_like");
6781                }
6782            }
6783
6784            std::cout << " -> tests passed" << std::endl;
6785        }
6786
6787        // =====================================================================
reindex_with_indexer (pd_test_5_all.cpp:40388)
40378    s.set_dtype_override("boolean");
40379    s.set_freq(std::optional<std::string>("D"));
40380    s.set_string_na_sentinel_disabled(true);
40381
40382    // Indexer: identity over the 3 source positions.
40383    numpy::NDArray<numpy::int64> indexer(std::vector<size_t>{3});
40384    indexer.setElementAt({0}, 0);
40385    indexer.setElementAt({1}, 1);
40386    indexer.setElementAt({2}, 2);
40387
40388    auto base = s.reindex_with_indexer(indexer);
40389    pandas_tests::check(base != nullptr, "case7.reindex_with_indexer_nonnull", local_fail);
40390    if (!base) return;
40391
40392    auto* r = dynamic_cast<pandas::Series<std::int64_t>*>(base.get());
40393    pandas_tests::check(r != nullptr, "case7.reindex_with_indexer_is_Series_int64",
40394                        local_fail);
40395    if (!r) return;
40396
40397    // dtype_override propagates (oracle says yes).
40398    pandas_tests::check(r->dtype_override().has_value() &&
reindex_with_spec (pd_test_5_all.cpp:54746)
54736}
54737
54738void case_25_b_bool_int_fill_explicit_spec(int& local_fail) {
54739    std::cout << "---- 25b bool_int_fill_explicit_spec\n";
54740
54741    pandas::DataFrame df;
54742    df.add_column<bool>("b", {true, false, true});
54743
54744    // Explicit-spec path: tag=Int is authoritative.
54745    pandas::FillSpec spec = pandas::FillSpec::integer(0);
54746    auto r = df.reindex_with_spec({"0", "1", "2", "3", "4"}, 0, spec);
54747
54748    std::string dt = r["b"].dtype_name();
54749    pandas_tests::check(dt == "object",
54750          "25b.b dtype should be object via reindex_with_spec "
54751          "(got '" + dt + "')", local_fail);
54752
54753    std::string rep = r.to_string();
54754    // Post-fix target: fill is literal int 0 (not False).
54755    size_t n_false = 0, pos = 0;
54756    while ((pos = rep.find("False", pos)) != std::string::npos) {
rename (pd_test_1_all.cpp:5816)
5806    std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810    std::cout << "========= rename ======================================";
5811
5812    pandas::CategoricalArray arr({"x", "y"});
5813    pandas::CategoricalIndex idx(arr, "old_name");
5814
5815    pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817    bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818                   renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819    if (!passed) {
5820        std::cout << "  [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821        throw std::runtime_error("pd_test_categorical_index_rename failed");
5822    }
5823
5824    std::cout << " -> tests passed" << std::endl;
5825}
rename (pd_test_1_all.cpp:5816)
5806    std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810    std::cout << "========= rename ======================================";
5811
5812    pandas::CategoricalArray arr({"x", "y"});
5813    pandas::CategoricalIndex idx(arr, "old_name");
5814
5815    pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817    bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818                   renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819    if (!passed) {
5820        std::cout << "  [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821        throw std::runtime_error("pd_test_categorical_index_rename failed");
5822    }
5823
5824    std::cout << " -> tests passed" << std::endl;
5825}
rename_axis (pd_test_1_all.cpp:6760)
6750                    throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional Z");
6751                }
6752            }
6753
6754            // Test rename_axis
6755            {
6756                std::map<std::string, std::vector<int>> data;
6757                data["A"] = {1, 2, 3};
6758                pandas::DataFrame df(data);
6759
6760                auto renamed = df.rename_axis("my_index", 0);
6761                // Should not throw
6762            }
6763
6764            // Test reindex_like
6765            {
6766                std::map<std::string, std::vector<int>> data1;
6767                data1["A"] = {1, 2};
6768                data1["B"] = {3, 4};
6769                pandas::DataFrame df1(data1);
rename_axis (pd_test_1_all.cpp:6760)
6750                    throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional Z");
6751                }
6752            }
6753
6754            // Test rename_axis
6755            {
6756                std::map<std::string, std::vector<int>> data;
6757                data["A"] = {1, 2, 3};
6758                pandas::DataFrame df(data);
6759
6760                auto renamed = df.rename_axis("my_index", 0);
6761                // Should not throw
6762            }
6763
6764            // Test reindex_like
6765            {
6766                std::map<std::string, std::vector<int>> data1;
6767                data1["A"] = {1, 2};
6768                data1["B"] = {3, 4};
6769                pandas::DataFrame df1(data1);
rename_columns (pd_test_1_all.cpp:6565)
6555            }
6556
6557            // Test drop columns
6558            auto dropped = df.drop(std::vector<std::string>{"B"}, 1);
6559            if (dropped.ncols() != 1) {
6560                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : drop ncols != 1" << std::endl;
6561                throw std::runtime_error("pd_test_dataframe_manipulation failed: drop ncols != 1");
6562            }
6563
6564            // Test rename
6565            auto renamed = df.rename_columns(std::map<std::string, std::string>{{"A", "X"}});
6566            if (!renamed.has_column("X")) {
6567                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : rename failed" << std::endl;
6568                throw std::runtime_error("pd_test_dataframe_manipulation failed: rename failed");
6569            }
6570
6571            // Test isna/notna with float data
6572            {
6573                std::map<std::string, std::vector<numpy::float64>> float_data;
6574                float_data["X"] = {1.0, std::nan(""), 3.0};
6575                float_data["Y"] = {4.0, 5.0, std::nan("")};
rename_result (pd_test_5_all.cpp:50613)
50603    return df;
50604}
50605
50606void case_1_dict_axis1(int& local_fail) {
50607    std::cout << "-- case_1_dict_axis1\n";
50608    auto df = make_flat_frame();
50609    pandas::RenameSpec spec;
50610    spec.kind = pandas::RenameSpec::Kind::Dict;
50611    spec.dict_mapper["a"] = "A";
50612    spec.dict_mapper["b"] = "B";
50613    auto r = df.rename_result(spec, /*axis=*/1, /*inplace=*/false, /*errors_raise=*/false);
50614    pandas_tests::check(r.is_frame(), "case_1.is_frame", local_fail);
50615    if (!r.is_frame()) return;
50616    auto& f = *std::get<std::unique_ptr<pandas::DataFrame>>(r.value);
50617    pandas_tests::check(f.columns().get_value_str(0) == "A", "case_1.col0_A", local_fail);
50618    pandas_tests::check(f.columns().get_value_str(1) == "B", "case_1.col1_B", local_fail);
50619}
50620
50621void case_2_dict_axis0(int& local_fail) {
50622    std::cout << "-- case_2_dict_axis0\n";
50623    auto df = make_flat_frame();
reorder_levels (pd_test_1_all.cpp:14495)
14485        void pd_test_multiindex_reorder_levels() {
14486            std::cout << "========= reorder_levels ============================== ";
14487
14488            std::vector<std::vector<std::string>> arrays = {
14489                {"a", "b"},
14490                {"x", "y"},
14491                {"1", "2"}
14492            };
14493
14494            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495            pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497            bool passed = true;
14498
14499            auto tup = reordered[0];
14500            if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501                std::cout << "  [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502                passed = false;
14503            }
14504
14505            if (!passed) {
reorder_levels (pd_test_1_all.cpp:14495)
14485        void pd_test_multiindex_reorder_levels() {
14486            std::cout << "========= reorder_levels ============================== ";
14487
14488            std::vector<std::vector<std::string>> arrays = {
14489                {"a", "b"},
14490                {"x", "y"},
14491                {"1", "2"}
14492            };
14493
14494            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495            pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497            bool passed = true;
14498
14499            auto tup = reordered[0];
14500            if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501                std::cout << "  [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502                passed = false;
14503            }
14504
14505            if (!passed) {
reorder_levels (pd_test_1_all.cpp:14495)
14485        void pd_test_multiindex_reorder_levels() {
14486            std::cout << "========= reorder_levels ============================== ";
14487
14488            std::vector<std::vector<std::string>> arrays = {
14489                {"a", "b"},
14490                {"x", "y"},
14491                {"1", "2"}
14492            };
14493
14494            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495            pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497            bool passed = true;
14498
14499            auto tup = reordered[0];
14500            if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501                std::cout << "  [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502                passed = false;
14503            }
14504
14505            if (!passed) {
reorder_levels (pd_test_1_all.cpp:14495)
14485        void pd_test_multiindex_reorder_levels() {
14486            std::cout << "========= reorder_levels ============================== ";
14487
14488            std::vector<std::vector<std::string>> arrays = {
14489                {"a", "b"},
14490                {"x", "y"},
14491                {"1", "2"}
14492            };
14493
14494            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495            pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497            bool passed = true;
14498
14499            auto tup = reordered[0];
14500            if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501                std::cout << "  [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502                passed = false;
14503            }
14504
14505            if (!passed) {
replace (pd_test_1_all.cpp:6623)
6613                }
6614            }
6615
6616            // Test replace
6617            {
6618                std::map<std::string, std::vector<numpy::float64>> float_data;
6619                float_data["X"] = {1.0, 2.0, 3.0};
6620                float_data["Y"] = {2.0, 2.0, 4.0};
6621                pandas::DataFrame df_repl(float_data);
6622
6623                auto replaced = df_repl.replace(2.0, 99.0);
6624                // Check some value was replaced (crude check via string)
6625                std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626                if (val_str.find("99") == std::string::npos) {
6627                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628                    throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
replace (pd_test_1_all.cpp:6623)
6613                }
6614            }
6615
6616            // Test replace
6617            {
6618                std::map<std::string, std::vector<numpy::float64>> float_data;
6619                float_data["X"] = {1.0, 2.0, 3.0};
6620                float_data["Y"] = {2.0, 2.0, 4.0};
6621                pandas::DataFrame df_repl(float_data);
6622
6623                auto replaced = df_repl.replace(2.0, 99.0);
6624                // Check some value was replaced (crude check via string)
6625                std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626                if (val_str.find("99") == std::string::npos) {
6627                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628                    throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
replace (pd_test_1_all.cpp:6623)
6613                }
6614            }
6615
6616            // Test replace
6617            {
6618                std::map<std::string, std::vector<numpy::float64>> float_data;
6619                float_data["X"] = {1.0, 2.0, 3.0};
6620                float_data["Y"] = {2.0, 2.0, 4.0};
6621                pandas::DataFrame df_repl(float_data);
6622
6623                auto replaced = df_repl.replace(2.0, 99.0);
6624                // Check some value was replaced (crude check via string)
6625                std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626                if (val_str.find("99") == std::string::npos) {
6627                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628                    throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
replace (pd_test_1_all.cpp:6623)
6613                }
6614            }
6615
6616            // Test replace
6617            {
6618                std::map<std::string, std::vector<numpy::float64>> float_data;
6619                float_data["X"] = {1.0, 2.0, 3.0};
6620                float_data["Y"] = {2.0, 2.0, 4.0};
6621                pandas::DataFrame df_repl(float_data);
6622
6623                auto replaced = df_repl.replace(2.0, 99.0);
6624                // Check some value was replaced (crude check via string)
6625                std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626                if (val_str.find("99") == std::string::npos) {
6627                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628                    throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
replace (pd_test_1_all.cpp:6623)
6613                }
6614            }
6615
6616            // Test replace
6617            {
6618                std::map<std::string, std::vector<numpy::float64>> float_data;
6619                float_data["X"] = {1.0, 2.0, 3.0};
6620                float_data["Y"] = {2.0, 2.0, 4.0};
6621                pandas::DataFrame df_repl(float_data);
6622
6623                auto replaced = df_repl.replace(2.0, 99.0);
6624                // Check some value was replaced (crude check via string)
6625                std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626                if (val_str.find("99") == std::string::npos) {
6627                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628                    throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
reset_index (pd_test_3_all.cpp:1618)
1608    }
1609
1610    std::cout << " -> tests passed" << std::endl;
1611}
1612
1613// ============================================================================
1614// Category 10: Remaining Untested Functions
1615// ============================================================================
1616
1617void pd_test_3_all_series_reset_index() {
1618    std::cout << "========= Series.reset_index() =======================";
1619
1620    std::vector<double> vals = {10.0, 20.0, 30.0};
1621    pandas::Series<double> s(vals, "test");
1622
1623    // Set a custom index
1624    pandas::Index<std::string> custom_idx({"a", "b", "c"});
1625    s.set_index(custom_idx);
1626
1627    // Reset the index
1628    s.reset_index(true);  // drop=true
set_axis (pd_test_1_all.cpp:6673)
6663            std::cout << " -> tests passed" << std::endl;
6664        }
6665
6666        // =====================================================================
6667        // Test: Index Operations
6668        // =====================================================================
6669        void pd_test_dataframe_index_ops() {
6670            std::cout << "========= index operations =================";
6671
6672            // Test set_axis (rows)
6673            {
6674                std::map<std::string, std::vector<int>> data;
6675                data["A"] = {1, 2, 3};
6676                pandas::DataFrame df(data);
6677
6678                auto renamed = df.set_axis({"x", "y", "z"}, 0);
6679                std::string idx0 = renamed.index().get_value_str(0);
6680                if (idx0 != "x") {
6681                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6682                    throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index_col (pd_test_3_all.cpp:828)
818    std::cout << "========= DataFrame.set_index_col brace-init overload =====";
819
820    // Case A: 2-element brace-init (the C2668 trigger)
821    {
822        pandas::DataFrame df;
823        df.add_column<std::string>("City",   {"London", "London", "NYC", "NYC"});
824        df.add_column<std::string>("Name",   {"Alice",  "Bob",    "Carol","Dave"});
825        df.add_column<std::int64_t>("Age",   {28, 35, 30, 25});
826        df.add_column<std::int64_t>("Salary",{50000, 60000, 55000, 45000});
827
828        auto mi_df = df.set_index_col({"City", "Name"});
829
830        if (mi_df.ncols() != 2) {
831            throw std::runtime_error("set_index_col brace-init: ncols mismatch");
832        }
833        if (mi_df.nrows() != 4) {
834            throw std::runtime_error("set_index_col brace-init: nrows mismatch");
835        }
836    }
837
838    // Case B: single-element brace-init
set_index_col (pd_test_3_all.cpp:828)
818    std::cout << "========= DataFrame.set_index_col brace-init overload =====";
819
820    // Case A: 2-element brace-init (the C2668 trigger)
821    {
822        pandas::DataFrame df;
823        df.add_column<std::string>("City",   {"London", "London", "NYC", "NYC"});
824        df.add_column<std::string>("Name",   {"Alice",  "Bob",    "Carol","Dave"});
825        df.add_column<std::int64_t>("Age",   {28, 35, 30, 25});
826        df.add_column<std::int64_t>("Salary",{50000, 60000, 55000, 45000});
827
828        auto mi_df = df.set_index_col({"City", "Name"});
829
830        if (mi_df.ncols() != 2) {
831            throw std::runtime_error("set_index_col brace-init: ncols mismatch");
832        }
833        if (mi_df.nrows() != 4) {
834            throw std::runtime_error("set_index_col brace-init: nrows mismatch");
835        }
836    }
837
838    // Case B: single-element brace-init
set_index_col (pd_test_3_all.cpp:828)
818    std::cout << "========= DataFrame.set_index_col brace-init overload =====";
819
820    // Case A: 2-element brace-init (the C2668 trigger)
821    {
822        pandas::DataFrame df;
823        df.add_column<std::string>("City",   {"London", "London", "NYC", "NYC"});
824        df.add_column<std::string>("Name",   {"Alice",  "Bob",    "Carol","Dave"});
825        df.add_column<std::int64_t>("Age",   {28, 35, 30, 25});
826        df.add_column<std::int64_t>("Salary",{50000, 60000, 55000, 45000});
827
828        auto mi_df = df.set_index_col({"City", "Name"});
829
830        if (mi_df.ncols() != 2) {
831            throw std::runtime_error("set_index_col brace-init: ncols mismatch");
832        }
833        if (mi_df.nrows() != 4) {
834            throw std::runtime_error("set_index_col brace-init: nrows mismatch");
835        }
836    }
837
838    // Case B: single-element brace-init
set_index_from_column (pd_test_3_all.cpp:1343)
1333        df.add_column<int64_t>("v_num", {1, 2, 3, 4});
1334        df.set_nan_marker("v_str", {true, false, false, true});
1335        // exact mask-bit assertions depend on Series<string> mask API
1336    }
1337
1338    // I. set_index_from_column<T>
1339    {
1340        pandas::DataFrame df;
1341        df.add_column<int64_t>("val", {1, 3});
1342        df.add_column<int64_t>("count", {30, 70});
1343        df.set_index_from_column<std::string>("group", {"A", "B"});
1344        if (!df.index_name().has_value() || df.index_name().value() != "group") {
1345            throw std::runtime_error("set_index_from_column: name not set");
1346        }
1347    }
1348
1349    std::cout << " -> tests passed" << std::endl;
1350}
1351
1352// ============================================================================
1353// Category 8: DateTime Functions
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229    std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231    // A. set_index_from_list -- string form
1232    {
1233        pandas::DataFrame df;
1234        df.add_column<int64_t>("one", {1, 2, 3, 4});
1235        df.add_column<int64_t>("two", {18, 20, 20, 18});
1236        df.set_index_from_list({"a", "b", "c", "d"});
1237        if (df.nrows() != 4) {
1238            throw std::runtime_error("set_index_from_list(string): nrows changed");
1239        }
1240        if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241            throw std::runtime_error("set_index_from_list(string): values wrong");
1242        }
1243    }
1244
1245    // B. set_index_from_list -- int form (reproduces test 1184)
1246    {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229    std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231    // A. set_index_from_list -- string form
1232    {
1233        pandas::DataFrame df;
1234        df.add_column<int64_t>("one", {1, 2, 3, 4});
1235        df.add_column<int64_t>("two", {18, 20, 20, 18});
1236        df.set_index_from_list({"a", "b", "c", "d"});
1237        if (df.nrows() != 4) {
1238            throw std::runtime_error("set_index_from_list(string): nrows changed");
1239        }
1240        if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241            throw std::runtime_error("set_index_from_list(string): values wrong");
1242        }
1243    }
1244
1245    // B. set_index_from_list -- int form (reproduces test 1184)
1246    {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229    std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231    // A. set_index_from_list -- string form
1232    {
1233        pandas::DataFrame df;
1234        df.add_column<int64_t>("one", {1, 2, 3, 4});
1235        df.add_column<int64_t>("two", {18, 20, 20, 18});
1236        df.set_index_from_list({"a", "b", "c", "d"});
1237        if (df.nrows() != 4) {
1238            throw std::runtime_error("set_index_from_list(string): nrows changed");
1239        }
1240        if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241            throw std::runtime_error("set_index_from_list(string): values wrong");
1242        }
1243    }
1244
1245    // B. set_index_from_list -- int form (reproduces test 1184)
1246    {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229    std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231    // A. set_index_from_list -- string form
1232    {
1233        pandas::DataFrame df;
1234        df.add_column<int64_t>("one", {1, 2, 3, 4});
1235        df.add_column<int64_t>("two", {18, 20, 20, 18});
1236        df.set_index_from_list({"a", "b", "c", "d"});
1237        if (df.nrows() != 4) {
1238            throw std::runtime_error("set_index_from_list(string): nrows changed");
1239        }
1240        if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241            throw std::runtime_error("set_index_from_list(string): values wrong");
1242        }
1243    }
1244
1245    // B. set_index_from_list -- int form (reproduces test 1184)
1246    {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229    std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231    // A. set_index_from_list -- string form
1232    {
1233        pandas::DataFrame df;
1234        df.add_column<int64_t>("one", {1, 2, 3, 4});
1235        df.add_column<int64_t>("two", {18, 20, 20, 18});
1236        df.set_index_from_list({"a", "b", "c", "d"});
1237        if (df.nrows() != 4) {
1238            throw std::runtime_error("set_index_from_list(string): nrows changed");
1239        }
1240        if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241            throw std::runtime_error("set_index_from_list(string): values wrong");
1242        }
1243    }
1244
1245    // B. set_index_from_list -- int form (reproduces test 1184)
1246    {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229    std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231    // A. set_index_from_list -- string form
1232    {
1233        pandas::DataFrame df;
1234        df.add_column<int64_t>("one", {1, 2, 3, 4});
1235        df.add_column<int64_t>("two", {18, 20, 20, 18});
1236        df.set_index_from_list({"a", "b", "c", "d"});
1237        if (df.nrows() != 4) {
1238            throw std::runtime_error("set_index_from_list(string): nrows changed");
1239        }
1240        if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241            throw std::runtime_error("set_index_from_list(string): values wrong");
1242        }
1243    }
1244
1245    // B. set_index_from_list -- int form (reproduces test 1184)
1246    {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229    std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231    // A. set_index_from_list -- string form
1232    {
1233        pandas::DataFrame df;
1234        df.add_column<int64_t>("one", {1, 2, 3, 4});
1235        df.add_column<int64_t>("two", {18, 20, 20, 18});
1236        df.set_index_from_list({"a", "b", "c", "d"});
1237        if (df.nrows() != 4) {
1238            throw std::runtime_error("set_index_from_list(string): nrows changed");
1239        }
1240        if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241            throw std::runtime_error("set_index_from_list(string): values wrong");
1242        }
1243    }
1244
1245    // B. set_index_from_list -- int form (reproduces test 1184)
1246    {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229    std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231    // A. set_index_from_list -- string form
1232    {
1233        pandas::DataFrame df;
1234        df.add_column<int64_t>("one", {1, 2, 3, 4});
1235        df.add_column<int64_t>("two", {18, 20, 20, 18});
1236        df.set_index_from_list({"a", "b", "c", "d"});
1237        if (df.nrows() != 4) {
1238            throw std::runtime_error("set_index_from_list(string): nrows changed");
1239        }
1240        if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241            throw std::runtime_error("set_index_from_list(string): values wrong");
1242        }
1243    }
1244
1245    // B. set_index_from_list -- int form (reproduces test 1184)
1246    {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229    std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231    // A. set_index_from_list -- string form
1232    {
1233        pandas::DataFrame df;
1234        df.add_column<int64_t>("one", {1, 2, 3, 4});
1235        df.add_column<int64_t>("two", {18, 20, 20, 18});
1236        df.set_index_from_list({"a", "b", "c", "d"});
1237        if (df.nrows() != 4) {
1238            throw std::runtime_error("set_index_from_list(string): nrows changed");
1239        }
1240        if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241            throw std::runtime_error("set_index_from_list(string): values wrong");
1242        }
1243    }
1244
1245    // B. set_index_from_list -- int form (reproduces test 1184)
1246    {
set_index_from_list (pd_test_3_all.cpp:1236)
1226}
1227
1228void pd_test_3_all_dataframe_pandas_facade_methods() {
1229    std::cout << "========= DataFrame pandas-style facade methods =======";
1230
1231    // A. set_index_from_list -- string form
1232    {
1233        pandas::DataFrame df;
1234        df.add_column<int64_t>("one", {1, 2, 3, 4});
1235        df.add_column<int64_t>("two", {18, 20, 20, 18});
1236        df.set_index_from_list({"a", "b", "c", "d"});
1237        if (df.nrows() != 4) {
1238            throw std::runtime_error("set_index_from_list(string): nrows changed");
1239        }
1240        if (df.index().get_value_str(0) != "a" || df.index().get_value_str(3) != "d") {
1241            throw std::runtime_error("set_index_from_list(string): values wrong");
1242        }
1243    }
1244
1245    // B. set_index_from_list -- int form (reproduces test 1184)
1246    {
set_index_from_strings (pd_test_3_all.cpp:1284)
1274        pandas::DataFrame df2;
1275        df2.add_column<int64_t>("val", {3, 4});
1276        df2.set_integer_index({2, 3});
1277        if (df2.index().size() != 2) {
1278            throw std::runtime_error("set_integer_index: size wrong");
1279        }
1280
1281        pandas::DataFrame df3;
1282        df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1283        df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1284        if (df3.index().get_value_str(4) != "b") {
1285            throw std::runtime_error("set_index_from_strings: values wrong");
1286        }
1287    }
1288
1289    // E. set_index_name + index_name() getter/setter
1290    {
1291        pandas::DataFrame df;
1292        df.add_column<int64_t>("val", {1, 2, 3});
1293        df.set_index_from_list({"r0", "r1", "r2"});
set_index_from_strings (pd_test_3_all.cpp:1284)
1274        pandas::DataFrame df2;
1275        df2.add_column<int64_t>("val", {3, 4});
1276        df2.set_integer_index({2, 3});
1277        if (df2.index().size() != 2) {
1278            throw std::runtime_error("set_integer_index: size wrong");
1279        }
1280
1281        pandas::DataFrame df3;
1282        df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1283        df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1284        if (df3.index().get_value_str(4) != "b") {
1285            throw std::runtime_error("set_index_from_strings: values wrong");
1286        }
1287    }
1288
1289    // E. set_index_name + index_name() getter/setter
1290    {
1291        pandas::DataFrame df;
1292        df.add_column<int64_t>("val", {1, 2, 3});
1293        df.set_index_from_list({"r0", "r1", "r2"});
set_index_name (pd_test_2_all.cpp:20842)
20832void test_sgb_apply_result_index_categorical() {
20833    std::cout << "  -- test_sgb_apply_result_index_categorical --" << std::endl;
20834
20835    std::vector<numpy::float64> values = {5.0, 10.0};
20836    pandas::Series<std::string> by({"A", "B"});
20837    pandas::Series<numpy::float64> data(values);
20838
20839    auto sgb = data.groupby(by);
20840    sgb.set_categorical_categories({"A", "B", "C"});
20841    sgb.set_index_name("cat_key");
20842
20843    pandas::Series<numpy::float64> result(values);
20844    std::vector<std::string> idx_labels = {"A", "B"};
20845    result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20846
20847    sgb.apply_result_index(result);
20848
20849    // Should have CategoricalIndex (dtype_name() returns "category")
20850    check(result.index().dtype_name() == "category", "is_categorical_index");
20851}
set_index_name (pd_test_2_all.cpp:20842)
20832void test_sgb_apply_result_index_categorical() {
20833    std::cout << "  -- test_sgb_apply_result_index_categorical --" << std::endl;
20834
20835    std::vector<numpy::float64> values = {5.0, 10.0};
20836    pandas::Series<std::string> by({"A", "B"});
20837    pandas::Series<numpy::float64> data(values);
20838
20839    auto sgb = data.groupby(by);
20840    sgb.set_categorical_categories({"A", "B", "C"});
20841    sgb.set_index_name("cat_key");
20842
20843    pandas::Series<numpy::float64> result(values);
20844    std::vector<std::string> idx_labels = {"A", "B"};
20845    result.set_index(std::make_unique<pandas::Index<std::string>>(idx_labels));
20846
20847    sgb.apply_result_index(result);
20848
20849    // Should have CategoricalIndex (dtype_name() returns "category")
20850    check(result.index().dtype_name() == "category", "is_categorical_index");
20851}
swaplevel (pd_test_1_all.cpp:14461)
14451        void pd_test_multiindex_swaplevel() {
14452            std::cout << "========= swaplevel =================================== ";
14453
14454            std::vector<std::vector<std::string>> arrays = {
14455                {"a", "b"},
14456                {"x", "y"}
14457            };
14458            std::vector<std::optional<std::string>> names = {"first", "second"};
14459
14460            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
14461            pandas::MultiIndex swapped = mi.swaplevel(0, 1);
14462
14463            bool passed = true;
14464
14465            // Tuple should be reversed
14466            auto tup = swapped[0];
14467            if (tup[0] != "x" || tup[1] != "a") {
14468                std::cout << "  [FAIL] : swapped tuple should be ('x', 'a')" << std::endl;
14469                passed = false;
14470            }
swaplevel (pd_test_1_all.cpp:14461)
14451        void pd_test_multiindex_swaplevel() {
14452            std::cout << "========= swaplevel =================================== ";
14453
14454            std::vector<std::vector<std::string>> arrays = {
14455                {"a", "b"},
14456                {"x", "y"}
14457            };
14458            std::vector<std::optional<std::string>> names = {"first", "second"};
14459
14460            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
14461            pandas::MultiIndex swapped = mi.swaplevel(0, 1);
14462
14463            bool passed = true;
14464
14465            // Tuple should be reversed
14466            auto tup = swapped[0];
14467            if (tup[0] != "x" || tup[1] != "a") {
14468                std::cout << "  [FAIL] : swapped tuple should be ('x', 'a')" << std::endl;
14469                passed = false;
14470            }
swaplevel (pd_test_1_all.cpp:14461)
14451        void pd_test_multiindex_swaplevel() {
14452            std::cout << "========= swaplevel =================================== ";
14453
14454            std::vector<std::vector<std::string>> arrays = {
14455                {"a", "b"},
14456                {"x", "y"}
14457            };
14458            std::vector<std::optional<std::string>> names = {"first", "second"};
14459
14460            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
14461            pandas::MultiIndex swapped = mi.swaplevel(0, 1);
14462
14463            bool passed = true;
14464
14465            // Tuple should be reversed
14466            auto tup = swapped[0];
14467            if (tup[0] != "x" || tup[1] != "a") {
14468                std::cout << "  [FAIL] : swapped tuple should be ('x', 'a')" << std::endl;
14469                passed = false;
14470            }
update (pd_test_1_all.cpp:13945)
13935            if (!result.has_column("C")) {
13936                passed = false;
13937                std::cout << "  [FAIL] : in pd_test_joining_combine_first() : missing column C" << std::endl;
13938                throw std::runtime_error("pd_test_joining_combine_first failed: col C missing");
13939            }
13940
13941            std::cout << " -> tests passed" << std::endl;
13942        }
13943
13944        // =====================================================================
13945        // update() Tests
13946        // =====================================================================
13947
13948        void pd_test_joining_update() {
13949            std::cout << "========= update ======================================";
13950
13951            std::map<std::string, std::vector<double>> left_data = {
13952                {"A", {1.0, 2.0, 3.0}},
13953                {"B", {10.0, 20.0, 30.0}}
13954            };
13955            std::vector<std::string> left_idx = {"x", "y", "z"};
backfill (pd_test_3_all.cpp:2645)
2635void pd_test_3_all_df_backfill_pad() {
2636    std::cout << "========= DataFrame.backfill/pad() =======================";
2637
2638    std::map<std::string, std::vector<double>> data = {
2639        {"A", {1.0, std::nan(""), std::nan(""), 4.0}},
2640        {"B", {std::nan(""), 2.0, std::nan(""), 4.0}}
2641    };
2642    pandas::DataFrame df(data);
2643
2644    // Test backfill (should fill backward)
2645    pandas::DataFrame bfill_result = df.backfill(0);
2646    if (bfill_result.nrows() != 4 || bfill_result.ncols() != 2) {
2647        throw std::runtime_error("backfill shape failed");
2648    }
2649
2650    // Test pad (should fill forward)
2651    pandas::DataFrame pad_result = df.pad(0);
2652    if (pad_result.nrows() != 4 || pad_result.ncols() != 2) {
2653        throw std::runtime_error("pad shape failed");
2654    }
bfill (pd_test_1_all.cpp:23603)
23593        std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594        return 0;
23595    }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
ffill (pd_test_1_all.cpp:23603)
23593        std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594        return 0;
23595    }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
fillna (pd_test_1_all.cpp:537)
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542        }
543
544        std::cout << " -> tests passed" << std::endl;
545    }
546
547    void pd_test_categorical_array_add_categories() {
fillna (pd_test_1_all.cpp:537)
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542        }
543
544        std::cout << " -> tests passed" << std::endl;
545    }
546
547    void pd_test_categorical_array_add_categories() {
fillna (pd_test_1_all.cpp:537)
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542        }
543
544        std::cout << " -> tests passed" << std::endl;
545    }
546
547    void pd_test_categorical_array_add_categories() {
fillna (pd_test_1_all.cpp:537)
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542        }
543
544        std::cout << " -> tests passed" << std::endl;
545    }
546
547    void pd_test_categorical_array_add_categories() {
fillna (pd_test_1_all.cpp:537)
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542        }
543
544        std::cout << " -> tests passed" << std::endl;
545    }
546
547    void pd_test_categorical_array_add_categories() {
interpolate (pd_test_1_all.cpp:24365)
24355        std::cout << "====================================== [OK] pd_test_idxmax_idxmin test suite ========================== " << std::endl;
24356        return 0;
24357    }
24358
24359} // namespace dataframe_tests
24360// ------------------- pd_test_idxmax_idxmin.cpp (end) -----------------------------
24361
24362// ------------------- pd_test_interpolate.cpp (start) -----------------------------
24363// dataframe_tests/pd_test_interpolate.cpp
24364// Test file for DataFrame.interpolate() method
24365
24366#include <iostream>
24367#include <stdexcept>
24368#include <cmath>
24369#include <limits>
24370#include <map>
24371#include "../pandas/pd_dataframe.h"
24372
24373// CRITICAL: No using namespace directives
isna (pd_test_1_all.cpp:524)
514            throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515        }
516
517        // Test count (non-NA)
518        if (arr.count() != 2) {
519            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520            throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
isna_frame (pd_test_3_all.cpp:10770)
10760        if (dtype_name.find("int") == std::string::npos) {
10761            std::cout << "  [FAIL] : in pd_test_3_all_niche_residual_fixes() : Case 3 dtype" << std::endl;
10762            throw std::runtime_error("pd_test_3_all_niche_residual_fixes failed: Case 3 dtype");
10763        }
10764        if (r[static_cast<size_t>(0)] != 50) {
10765            std::cout << "  [FAIL] : in pd_test_3_all_niche_residual_fixes() : Case 3 value" << std::endl;
10766            throw std::runtime_error("pd_test_3_all_niche_residual_fixes failed: Case 3 value");
10767        }
10768    }
10769
10770    // --- Case 4: isna_frame().to_string() ---
10771    {
10772        pandas::DataFrame df;
10773        df.add_column<double>("A", {1.0, std::nan(""), 3.0});
10774        df.add_column<double>("B", {std::nan(""), 2.0, std::nan("")});
10775        std::string s = df.isna_frame().to_string();
10776        if (s.empty()) {
10777            std::cout << "  [FAIL] : in pd_test_3_all_niche_residual_fixes() : Case 4 empty" << std::endl;
10778            throw std::runtime_error("pd_test_3_all_niche_residual_fixes failed: Case 4 empty");
10779        }
10780        // Also verify existing NDArray-returning isna() still compiles
isnull (pd_test_3_all.cpp:671)
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665    std::cout << "========= Index.isnull/notnull() =====================";
666
667    // Test with float index (can have NaN)
668    std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669    pandas::Index<double> idx(vals);
670
671    numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672    if (isnull_result.getSize() != 4) {
673        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674        throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675    }
676    // Index 0: 1.0 -> not null
677    if (isnull_result.getElementAt({0})) {
678        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : index 0 should not be null" << std::endl;
679        throw std::runtime_error("pd_test_3_all_index_null_detection failed: index 0");
680    }
681    // Index 1: NaN -> null
isnull_frame (pd_test_5_all.cpp:34434)
34424    pandas_tests::check(row_mi_ok, "case2.notna_preserves_row_multiindex", local_fail);
34425    pandas_tests::check(lvl_vals_ok, "case2.notna_preserves_level_values", local_fail);
34426}
34427
34428void isnamultiindex_629108_case_3_isnull_alias_row_mi_only(int& local_fail) {
34429    const std::string tag = "[case3]";
34430    std::cout << "\n" << tag << " === isnull_frame alias: row MI only ===\n";
34431    bool row_mi_ok = false;
34432    try {
34433        auto df = mk_row_mi_only();
34434        auto out = df.isnull_frame();
34435        dump_df_mi_state(tag, "isnull", out);
34436        row_mi_ok = out.has_multiindex();
34437    } catch (const std::exception& e) {
34438        std::cout << tag << " exception: " << e.what() << "\n";
34439    }
34440    pandas_tests::check(row_mi_ok, "case3.isnull_preserves_row_multiindex", local_fail);
34441}
34442
34443void isnamultiindex_629108_case_4_notnull_alias_row_mi_only(int& local_fail) {
34444    const std::string tag = "[case4]";
notna (pd_test_1_all.cpp:6595)
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593                }
6594
6595                auto notna_mask = df_na.notna();
6596                if (notna_mask.getElementAt({1, 0})) {
6597                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598                    throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599                }
6600            }
6601
6602            // Test fillna
6603            {
6604                std::map<std::string, std::vector<numpy::float64>> float_data;
6605                float_data["X"] = {1.0, std::nan(""), 3.0};
notna_frame (pd_test_5_all.cpp:34411)
34401    pandas_tests::check(lvl_vals_ok, "case1.isna_preserves_level_values", local_fail);
34402}
34403
34404void isnamultiindex_629108_case_2_notna_row_mi_only(int& local_fail) {
34405    const std::string tag = "[case2]";
34406    std::cout << "\n" << tag << " === notna_frame: row MI only ===\n";
34407    bool row_mi_ok = false;
34408    bool lvl_vals_ok = false;
34409    try {
34410        auto df = mk_row_mi_only();
34411        auto out = df.notna_frame();
34412        dump_df_mi_state(tag, "src",   df);
34413        dump_df_mi_state(tag, "notna", out);
34414        row_mi_ok = out.has_multiindex();
34415        if (row_mi_ok) {
34416            const auto& lv0 = out.multiindex().get_level_values_str(0);
34417            const auto& lv1 = out.multiindex().get_level_values_str(1);
34418            lvl_vals_ok = (lv0.size() == 3 && lv0[0] == "A" && lv0[1] == "A" && lv0[2] == "A"
34419                           && lv1.size() == 3 && lv1[0] == "B" && lv1[1] == "C" && lv1[2] == "D");
34420        }
34421    } catch (const std::exception& e) {
notnull (pd_test_3_all.cpp:665)
655    }
656
657    std::cout << " -> tests passed" << std::endl;
658}
659
660// ============================================================================
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665    std::cout << "========= Index.isnull/notnull() =====================";
666
667    // Test with float index (can have NaN)
668    std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669    pandas::Index<double> idx(vals);
670
671    numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672    if (isnull_result.getSize() != 4) {
673        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674        throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675    }
notnull_frame (pd_test_5_all.cpp:34449)
34439    }
34440    pandas_tests::check(row_mi_ok, "case3.isnull_preserves_row_multiindex", local_fail);
34441}
34442
34443void isnamultiindex_629108_case_4_notnull_alias_row_mi_only(int& local_fail) {
34444    const std::string tag = "[case4]";
34445    std::cout << "\n" << tag << " === notnull_frame alias: row MI only ===\n";
34446    bool row_mi_ok = false;
34447    try {
34448        auto df = mk_row_mi_only();
34449        auto out = df.notnull_frame();
34450        dump_df_mi_state(tag, "notnull", out);
34451        row_mi_ok = out.has_multiindex();
34452    } catch (const std::exception& e) {
34453        std::cout << tag << " exception: " << e.what() << "\n";
34454    }
34455    pandas_tests::check(row_mi_ok, "case4.notnull_preserves_row_multiindex", local_fail);
34456}
34457
34458void isnamultiindex_629108_case_5_isna_row_mi_and_col_mi(int& local_fail) {
34459    const std::string tag = "[case5]";
pad (pd_test_3_all.cpp:1771)
1761    if (result_single.nrows() != 3 || result_single.ncols() != 1) {
1762        std::cout << "  [FAIL] : in pd_test_3_all_dataframe_unstack() : single col shape mismatch" << std::endl;
1763        throw std::runtime_error("pd_test_3_all_dataframe_unstack failed: single col shape");
1764    }
1765
1766    std::cout << " -> tests passed" << std::endl;
1767}
1768
1769void pd_test_3_all_fbbuilder_pad() {
1770    std::cout << "========= FBBuilder.pad() (internal) =================";
1771
1772    // Note: FBBuilder.pad() is an internal method for FlatBuffer serialization
1773    // It's not the pandas DataFrame.pad() method (which is ffill alias)
1774    // This test verifies the to_feather() serialization works, which uses FBBuilder.pad()
1775
1776    std::map<std::string, std::vector<double>> data = {
1777        {"A", {1.0, 2.0, 3.0}},
1778        {"B", {4.0, 5.0, 6.0}}
1779    };
1780    pandas::DataFrame df(data);
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
count_cols (pd_test_1_all.cpp:6386)
6376            }
6377
6378            // Test max
6379            auto maxs = df.max_cols();
6380            if (std::abs(maxs[0] - 5.0) > 1e-10) {
6381                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : max A != 5" << std::endl;
6382                throw std::runtime_error("pd_test_dataframe_aggregations failed: max A != 5");
6383            }
6384
6385            // Test count
6386            auto counts = df.count_cols();
6387            if (counts[0] != 5) {
6388                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : count A != 5" << std::endl;
6389                throw std::runtime_error("pd_test_dataframe_aggregations failed: count A != 5");
6390            }
6391
6392            std::cout << " -> tests passed" << std::endl;
6393        }
6394
6395        // =====================================================================
6396        // Test: Sorting
cummax (pd_test_1_all.cpp:5152)
5142            // cummin: [1, 1, 1, 1]
5143            auto cmin = df.cummin();
5144            val = cmin["A"].get_value_str(3);
5145            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5146            if (!passed) {
5147                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummin failed" << std::endl;
5148                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummin failed");
5149            }
5150
5151            // cummax: [1, 2, 3, 4]
5152            auto cmax = df.cummax();
5153            val = cmax["A"].get_value_str(2);
5154            passed = std::abs(std::stod(val) - 3.0) < 0.001;
5155            if (!passed) {
5156                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummax failed" << std::endl;
5157                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummax failed");
5158            }
5159
5160            std::cout << " -> tests passed" << std::endl;
5161        }
cummin (pd_test_1_all.cpp:5143)
5133            // cumprod: [1, 2, 6, 24]
5134            auto cp = df.cumprod();
5135            val = cp["A"].get_value_str(3);
5136            passed = std::abs(std::stod(val) - 24.0) < 0.001;
5137            if (!passed) {
5138                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumprod failed" << std::endl;
5139                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumprod failed");
5140            }
5141
5142            // cummin: [1, 1, 1, 1]
5143            auto cmin = df.cummin();
5144            val = cmin["A"].get_value_str(3);
5145            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5146            if (!passed) {
5147                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummin failed" << std::endl;
5148                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummin failed");
5149            }
5150
5151            // cummax: [1, 2, 3, 4]
5152            auto cmax = df.cummax();
5153            val = cmax["A"].get_value_str(2);
cumprod (pd_test_1_all.cpp:5134)
5124            // cumsum: [1, 3, 6, 10]
5125            auto cs = df.cumsum();
5126            std::string val = cs["A"].get_value_str(2);
5127            bool passed = std::abs(std::stod(val) - 6.0) < 0.001;
5128            if (!passed) {
5129                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumsum failed" << std::endl;
5130                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumsum failed");
5131            }
5132
5133            // cumprod: [1, 2, 6, 24]
5134            auto cp = df.cumprod();
5135            val = cp["A"].get_value_str(3);
5136            passed = std::abs(std::stod(val) - 24.0) < 0.001;
5137            if (!passed) {
5138                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumprod failed" << std::endl;
5139                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumprod failed");
5140            }
5141
5142            // cummin: [1, 1, 1, 1]
5143            auto cmin = df.cummin();
5144            val = cmin["A"].get_value_str(3);
cumsum (pd_test_1_all.cpp:5125)
5115        }
5116
5117        void pd_test_arithmetic_dataframe_cumulative() {
5118            std::cout << "========= DataFrame cumulative ==================";
5119
5120            std::map<std::string, std::vector<double>> data;
5121            data["A"] = {1.0, 2.0, 3.0, 4.0};
5122            pandas::DataFrame df(data);
5123
5124            // cumsum: [1, 3, 6, 10]
5125            auto cs = df.cumsum();
5126            std::string val = cs["A"].get_value_str(2);
5127            bool passed = std::abs(std::stod(val) - 6.0) < 0.001;
5128            if (!passed) {
5129                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumsum failed" << std::endl;
5130                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumsum failed");
5131            }
5132
5133            // cumprod: [1, 2, 6, 24]
5134            auto cp = df.cumprod();
5135            val = cp["A"].get_value_str(3);
describe (pd_test_2_all.cpp:19793)
19783        ++g_fail;
19784    }
19785}
19786
19787static bool approx_eq(double a, double b, double tol = 1e-9) {
19788    if (std::isnan(a) && std::isnan(b)) return true;
19789    return std::abs(a - b) < tol;
19790}
19791
19792// =====================================================================
19793// Test: describe() default mode — numeric columns only
19794// =====================================================================
19795
19796void pd_test_describe_numeric_only() {
19797    std::cout << "  -- pd_test_describe_numeric_only --" << std::endl;
19798
19799    pandas::DataFrame df;
19800    df.add_column("A", std::vector<double>{1.0, 2.0, 3.0, 4.0, 5.0});
19801    df.add_column("B", std::vector<double>{10.0, 20.0, 30.0, 40.0, 50.0});
19802    df.add_column("Name", std::vector<std::string>{"a", "b", "c", "d", "e"});
describe_full (pd_test_2_all.cpp:19758)
19748              << (dataframe_tests_broadcasting::g_fail == 0 ? "OK" : "FAIL")
19749              << "] pd_test_broadcasting test suite ==========================" << std::endl;
19750
19751    return dataframe_tests_broadcasting::g_fail;
19752}
19753
19754} // namespace dataframe_tests
19755// ------------------- pd_test_broadcasting.cpp (end) -----------------------------
19756
19757// ------------------- pd_test_describe.cpp (start) -----------------------------
19758// pd_test_describe.cpp - Tests for describe_full() migration
19759// StringColumnStats, compute_string_column_stats(), describe_full() modes
19760
19761#include <iostream>
19762#include <string>
19763#include <vector>
19764#include <cmath>
19765
19766#include "../pandas/pd_dataframe.h"
19767#include "../pandas/pd_series.h"
19768#include "../pandas/pd_index.h"
kurt (pd_test_1_all.cpp:4599)
4589            std::cout << "========= Series skew/kurt ======================";
4590
4591            pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592            auto skew_val = s.skew();
4593            bool passed = skew_val.has_value() && *skew_val > 0;  // Should be right-skewed
4594            if (!passed) {
4595                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597            }
4598
4599            auto kurt_val = s.kurt();
4600            passed = kurt_val.has_value();
4601            if (!passed) {
4602                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
4603                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurt should have value");
4604            }
4605
4606            // Test kurtosis alias
4607            auto kurt_alias = s.kurtosis();
4608            passed = kurt_alias.has_value() && std::abs(*kurt_alias - *kurt_val) < 0.0001;
4609            if (!passed) {
kurt_cols (pd_test_1_all.cpp:4786)
4776                throw std::runtime_error("pd_test_aggregation_dataframe_skew_kurt_cols failed: skew_cols should return 1 value");
4777            }
4778
4779            // Skew should be positive for right-skewed data
4780            passed = skew[0] > 0;
4781            if (!passed) {
4782                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_skew_kurt_cols() : skew should be positive" << std::endl;
4783                throw std::runtime_error("pd_test_aggregation_dataframe_skew_kurt_cols failed: skew should be positive");
4784            }
4785
4786            auto kurt = df.kurt_cols();
4787            passed = kurt.size() == 1 && !std::isnan(kurt[0]);
4788            if (!passed) {
4789                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_skew_kurt_cols() : kurt_cols should return valid value" << std::endl;
4790                throw std::runtime_error("pd_test_aggregation_dataframe_skew_kurt_cols failed: kurt_cols should return valid value");
4791            }
4792
4793            std::cout << " -> tests passed" << std::endl;
4794        }
4795
4796    } // namespace dataframe_tests_aggregation
kurtosis (pd_test_1_all.cpp:4607)
4597            }
4598
4599            auto kurt_val = s.kurt();
4600            passed = kurt_val.has_value();
4601            if (!passed) {
4602                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
4603                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurt should have value");
4604            }
4605
4606            // Test kurtosis alias
4607            auto kurt_alias = s.kurtosis();
4608            passed = kurt_alias.has_value() && std::abs(*kurt_alias - *kurt_val) < 0.0001;
4609            if (!passed) {
4610                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurtosis alias failed" << std::endl;
4611                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurtosis alias failed");
4612            }
4613
4614            std::cout << " -> tests passed" << std::endl;
4615        }
4616
4617        void pd_test_aggregation_series_pct_change() {
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
max_cols (pd_test_1_all.cpp:6379)
6369            }
6370
6371            // Test min
6372            auto mins = df.min_cols();
6373            if (std::abs(mins[0] - 1.0) > 1e-10) {
6374                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : min A != 1" << std::endl;
6375                throw std::runtime_error("pd_test_dataframe_aggregations failed: min A != 1");
6376            }
6377
6378            // Test max
6379            auto maxs = df.max_cols();
6380            if (std::abs(maxs[0] - 5.0) > 1e-10) {
6381                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : max A != 5" << std::endl;
6382                throw std::runtime_error("pd_test_dataframe_aggregations failed: max A != 5");
6383            }
6384
6385            // Test count
6386            auto counts = df.count_cols();
6387            if (counts[0] != 5) {
6388                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : count A != 5" << std::endl;
6389                throw std::runtime_error("pd_test_dataframe_aggregations failed: count A != 5");
mean (pd_test_1_all.cpp:282)
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
mean_cols (pd_test_1_all.cpp:6364)
6354            if (std::abs(sum_a - 15.0) > 1e-10) {
6355                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : sum A != 15" << std::endl;
6356                throw std::runtime_error("pd_test_dataframe_aggregations failed: sum A != 15");
6357            }
6358            if (std::abs(sum_b - 150.0) > 1e-10) {
6359                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : sum B != 150" << std::endl;
6360                throw std::runtime_error("pd_test_dataframe_aggregations failed: sum B != 150");
6361            }
6362
6363            // Test mean
6364            auto means = df.mean_cols();
6365            double mean_a = means[0];
6366            if (std::abs(mean_a - 3.0) > 1e-10) {
6367                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : mean A != 3" << std::endl;
6368                throw std::runtime_error("pd_test_dataframe_aggregations failed: mean A != 3");
6369            }
6370
6371            // Test min
6372            auto mins = df.min_cols();
6373            if (std::abs(mins[0] - 1.0) > 1e-10) {
6374                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : min A != 1" << std::endl;
median (pd_test_1_all.cpp:20910)
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901            }
20902
20903            std::cout << " -> tests passed" << std::endl;
20904        }
20905
20906        void pd_test_expanding_median() {
20907            std::cout << "========= Expanding median ======================";
20908
20909            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910            auto result = s.expanding().median();
20911
20912            // Expanding median: 1, 1.5, 2, 2.5, 3
20913            bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914                          std::abs(result[1] - 1.5) < 0.001 &&
20915                          std::abs(result[2] - 2.0) < 0.001 &&
20916                          std::abs(result[3] - 2.5) < 0.001 &&
20917                          std::abs(result[4] - 3.0) < 0.001;
20918            if (!passed) {
20919                std::cout << "  [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920                throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
median_cols (pd_test_1_all.cpp:24860)
24850        std::cout << "====================================== [OK] pd_test_interpolate test suite ========================== " << std::endl;
24851        return 0;
24852    }
24853
24854} // namespace dataframe_tests
24855// ------------------- pd_test_interpolate.cpp (end) -----------------------------
24856
24857// ------------------- pd_test_median.cpp (start) -----------------------------
24858// dataframe_tests/pd_test_median.cpp
24859// Tests for DataFrame.median() and DataFrame.median_cols() methods
24860
24861#include <iostream>
24862#include <stdexcept>
24863#include <cmath>
24864#include <limits>
24865#include "../pandas/pd_dataframe.h"
24866
24867// CRITICAL: No using namespace directives
24868
24869namespace dataframe_tests {
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
min_cols (pd_test_1_all.cpp:6372)
6362            // Test mean
6363            auto means = df.mean_cols();
6364            double mean_a = means[0];
6365            if (std::abs(mean_a - 3.0) > 1e-10) {
6366                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : mean A != 3" << std::endl;
6367                throw std::runtime_error("pd_test_dataframe_aggregations failed: mean A != 3");
6368            }
6369
6370            // Test min
6371            auto mins = df.min_cols();
6372            if (std::abs(mins[0] - 1.0) > 1e-10) {
6373                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : min A != 1" << std::endl;
6374                throw std::runtime_error("pd_test_dataframe_aggregations failed: min A != 1");
6375            }
6376
6377            // Test max
6378            auto maxs = df.max_cols();
6379            if (std::abs(maxs[0] - 5.0) > 1e-10) {
6380                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : max A != 5" << std::endl;
6381                throw std::runtime_error("pd_test_dataframe_aggregations failed: max A != 5");
mode (pd_test_1_all.cpp:4569)
4559                throw std::runtime_error("pd_test_aggregation_series_quantile failed: quantile(1) should be 5.0");
4560            }
4561
4562            std::cout << " -> tests passed" << std::endl;
4563        }
4564
4565        void pd_test_aggregation_series_mode() {
4566            std::cout << "========= Series mode ===========================";
4567
4568            pandas::Series<int> s({1, 2, 2, 3, 3, 3});
4569            auto m = s.mode();
4570            bool passed = m.size() == 1 && m[0] == 3;
4571            if (!passed) {
4572                std::cout << "  [FAIL] : in pd_test_aggregation_series_mode() : mode should be 3" << std::endl;
4573                throw std::runtime_error("pd_test_aggregation_series_mode failed: mode should be 3");
4574            }
4575
4576            // Test multi-mode
4577            pandas::Series<int> s2({1, 1, 2, 2});
4578            auto m2 = s2.mode();
4579            passed = m2.size() == 2;  // Both 1 and 2 are modes
nunique (pd_test_1_all.cpp:10604)
10594    std::cout << " -> tests passed" << std::endl;
10595}
10596
10597void pd_test_extension_index_nunique() {
10598    std::cout << "========= nunique =========================";
10599
10600    pandas::CategoricalArray arr({"a", "b", "a", "c", "b", std::nullopt});
10601    pandas::CategoricalIndex idx(arr);
10602
10603    bool passed = (idx.nunique(true) == 3 && idx.nunique(false) == 4);
10604    if (!passed) {
10605        std::cout << "  [FAIL] : in pd_test_extension_index_nunique() : nunique check failed" << std::endl;
10606        throw std::runtime_error("pd_test_extension_index_nunique failed");
10607    }
10608
10609    std::cout << " -> tests passed" << std::endl;
10610}
10611
10612void pd_test_extension_index_factorize() {
10613    std::cout << "========= factorize =========================";
nunique_cols (pd_test_1_all.cpp:25375)
25365        std::cout << "====================================== [OK] pd_test_mode test suite ========================== " << std::endl;
25366        return 0;
25367    }
25368
25369} // namespace dataframe_tests
25370// ------------------- pd_test_mode.cpp (end) -----------------------------
25371
25372// ------------------- pd_test_nunique.cpp (start) -----------------------------
25373// dataframe_tests/pd_test_nunique.cpp
25374// Tests for DataFrame.nunique() and DataFrame.nunique_cols() methods
25375
25376#include <iostream>
25377#include <stdexcept>
25378#include <cmath>
25379#include <limits>
25380#include "../pandas/pd_dataframe.h"
25381
25382// CRITICAL: No using namespace directives
25383
25384namespace dataframe_tests {
prod (pd_test_1_all.cpp:26082)
26072        std::cout << "====================================== [OK] pd_test_pivot_table test suite ========================== " << std::endl;
26073        return 0;
26074    }
26075
26076} // namespace dataframe_tests
26077// ------------------- pd_test_pivot_table.cpp (end) -----------------------------
26078
26079// ------------------- pd_test_prod.cpp (start) -----------------------------
26080// dataframe_tests/pd_test_prod.cpp
26081// Tests for DataFrame.prod() and DataFrame.prod_cols() methods
26082
26083#include <iostream>
26084#include <stdexcept>
26085#include <cmath>
26086#include <limits>
26087#include "../pandas/pd_dataframe.h"
26088
26089// CRITICAL: No using namespace directives
26090
26091namespace dataframe_tests {
prod_cols (pd_test_1_all.cpp:26082)
26072        std::cout << "====================================== [OK] pd_test_pivot_table test suite ========================== " << std::endl;
26073        return 0;
26074    }
26075
26076} // namespace dataframe_tests
26077// ------------------- pd_test_pivot_table.cpp (end) -----------------------------
26078
26079// ------------------- pd_test_prod.cpp (start) -----------------------------
26080// dataframe_tests/pd_test_prod.cpp
26081// Tests for DataFrame.prod() and DataFrame.prod_cols() methods
26082
26083#include <iostream>
26084#include <stdexcept>
26085#include <cmath>
26086#include <limits>
26087#include "../pandas/pd_dataframe.h"
26088
26089// CRITICAL: No using namespace directives
26090
26091namespace dataframe_tests {
product (pd_test_3_all.cpp:2584)
2574    // Test quantile along rows
2575    pandas::Series<numpy::float64> q50_rows = df.quantile(0.5, 1);
2576    if (q50_rows.size() != 5) {
2577        throw std::runtime_error("quantile(0.5, axis=1) failed");
2578    }
2579
2580    std::cout << " -> tests passed" << std::endl;
2581}
2582
2583void pd_test_3_all_df_product() {
2584    std::cout << "========= DataFrame.product(axis) ========================";
2585
2586    std::map<std::string, std::vector<double>> data = {
2587        {"A", {1.0, 2.0, 3.0}},
2588        {"B", {4.0, 5.0, 6.0}}
2589    };
2590    pandas::DataFrame df(data);
2591
2592    // Test product along columns
2593    pandas::Series<numpy::float64> prod_cols = df.product(0);
2594    if (prod_cols.size() != 2 || std::abs(prod_cols[static_cast<size_t>(0)] - 6.0) > 0.001 ||
quantile (pd_test_1_all.cpp:4540)
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
4535
4536        void pd_test_aggregation_series_quantile() {
4537            std::cout << "========= Series quantile =======================";
4538
4539            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4540            auto q50 = s.quantile(0.5);
4541            bool passed = q50.has_value() && std::abs(*q50 - 3.0) < 0.001;
4542            if (!passed) {
4543                std::cout << "  [FAIL] : in pd_test_aggregation_series_quantile() : quantile(0.5) should be 3.0" << std::endl;
4544                throw std::runtime_error("pd_test_aggregation_series_quantile failed: quantile(0.5) should be 3.0");
4545            }
4546
4547            // Test q=0 and q=1
4548            auto q0 = s.quantile(0.0);
4549            passed = q0.has_value() && std::abs(*q0 - 1.0) < 0.001;
4550            if (!passed) {
quantile_cols (pd_test_1_all.cpp:4753)
4743        }
4744
4745        void pd_test_aggregation_dataframe_quantile_cols() {
4746            std::cout << "========= DataFrame quantile_cols ===============";
4747
4748            std::map<std::string, std::vector<double>> data;
4749            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4750            data["B"] = {10.0, 20.0, 30.0, 40.0, 50.0};
4751            pandas::DataFrame df(data);
4752
4753            auto q50 = df.quantile_cols(0.5);
4754
4755            // Check A median = 3.0
4756            bool passed = std::abs(q50[0] - 3.0) < 0.001 || std::abs(q50[1] - 3.0) < 0.001;
4757            if (!passed) {
4758                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_quantile_cols() : A median should be 3.0" << std::endl;
4759                throw std::runtime_error("pd_test_aggregation_dataframe_quantile_cols failed: A median should be 3.0");
4760            }
4761
4762            std::cout << " -> tests passed" << std::endl;
4763        }
sem (pd_test_1_all.cpp:4525)
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
skew (pd_test_1_all.cpp:4592)
4582                throw std::runtime_error("pd_test_aggregation_series_mode failed: multi-mode should return 2 values");
4583            }
4584
4585            std::cout << " -> tests passed" << std::endl;
4586        }
4587
4588        void pd_test_aggregation_series_skew_kurt() {
4589            std::cout << "========= Series skew/kurt ======================";
4590
4591            pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592            auto skew_val = s.skew();
4593            bool passed = skew_val.has_value() && *skew_val > 0;  // Should be right-skewed
4594            if (!passed) {
4595                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597            }
4598
4599            auto kurt_val = s.kurt();
4600            passed = kurt_val.has_value();
4601            if (!passed) {
4602                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
skew_cols (pd_test_1_all.cpp:4772)
4762            std::cout << " -> tests passed" << std::endl;
4763        }
4764
4765        void pd_test_aggregation_dataframe_skew_kurt_cols() {
4766            std::cout << "========= DataFrame skew/kurt_cols ==============";
4767
4768            std::map<std::string, std::vector<double>> data;
4769            data["A"] = {1.0, 2.0, 2.0, 3.0, 9.0};
4770            pandas::DataFrame df(data);
4771
4772            auto skew = df.skew_cols();
4773            bool passed = skew.size() == 1;
4774            if (!passed) {
4775                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_skew_kurt_cols() : skew_cols should return 1 value" << std::endl;
4776                throw std::runtime_error("pd_test_aggregation_dataframe_skew_kurt_cols failed: skew_cols should return 1 value");
4777            }
4778
4779            // Skew should be positive for right-skewed data
4780            passed = skew[0] > 0;
4781            if (!passed) {
4782                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_skew_kurt_cols() : skew should be positive" << std::endl;
std (pd_test_1_all.cpp:4526)
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
4535
4536        void pd_test_aggregation_series_quantile() {
std_ (pd_test_1_all.cpp:20752)
20742                throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743            }
20744
20745            std::cout << " -> tests passed" << std::endl;
20746        }
20747
20748        void pd_test_rolling_std() {
20749            std::cout << "========= Rolling std ===========================";
20750
20751            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752            auto result = s.rolling(3).std_();
20753
20754            // std([1,2,3]) = 1.0 (ddof=1)
20755            // std([2,3,4]) = 1.0
20756            // std([3,4,5]) = 1.0
20757            bool passed = std::abs(result[2] - 1.0) < 0.001;
20758            if (!passed) {
20759                std::cout << "  [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760                throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761            }
std_ (pd_test_1_all.cpp:20752)
20742                throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743            }
20744
20745            std::cout << " -> tests passed" << std::endl;
20746        }
20747
20748        void pd_test_rolling_std() {
20749            std::cout << "========= Rolling std ===========================";
20750
20751            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752            auto result = s.rolling(3).std_();
20753
20754            // std([1,2,3]) = 1.0 (ddof=1)
20755            // std([2,3,4]) = 1.0
20756            // std([3,4,5]) = 1.0
20757            bool passed = std::abs(result[2] - 1.0) < 0.001;
20758            if (!passed) {
20759                std::cout << "  [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760                throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761            }
std_cols (pd_test_plan_coverage_gaps.cpp:130)
120    if (local_fail > 0) throw std::runtime_error("pd_test_cut_retbins_front_equals_min_val_right_false failed");
121    std::cout << " -> tests passed" << std::endl;
122}
123
124// -------------------------------------------------------------------------
125// plan_float_reduction_precision — DataFrame axis=0 reducers coverage (R10 gap)
126//
127// The `_3` suite only exercises Series<double>::sum/mean/var/std. The plan's
128// R10 Files-to-Update adds classify_column_dtypes (DataFrame::sum(axis=0)),
129// mean_cols() (DataFrame::mean(axis=0), describe), and std_cols()
130// (DataFrame::std/var(axis=0), describe). None are touched by _3 assertions.
131// -------------------------------------------------------------------------
132void pd_test_dataframe_sum_axis0_small() {
133    std::cout << "========= DataFrame::sum(axis=0) small-data sanity =======";
134    int local_fail = 0;
135
136    std::map<std::string, std::vector<double>> data = {
137        {"A", {1.0, 2.0, 3.0, 4.0}},
138        {"B", {10.0, 20.0, 30.0, 40.0}}
139    };
sum (pd_test_1_all.cpp:276)
266        }
267
268        // Test sum/mean
269        pandas::BooleanArray arr({
270            std::optional<bool>(true),
271            std::optional<bool>(false),
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
sum_cols (pd_test_1_all.cpp:6351)
6341        void pd_test_dataframe_aggregations() {
6342            std::cout << "========= aggregations =====================";
6343
6344            std::map<std::string, std::vector<numpy::float64>> data;
6345            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
6346            data["B"] = {10.0, 20.0, 30.0, 40.0, 50.0};
6347
6348            pandas::DataFrame df(data);
6349
6350            // Test sum
6351            auto sums = df.sum_cols();
6352            double sum_a = sums[0];
6353            double sum_b = sums[1];
6354            if (std::abs(sum_a - 15.0) > 1e-10) {
6355                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : sum A != 15" << std::endl;
6356                throw std::runtime_error("pd_test_dataframe_aggregations failed: sum A != 15");
6357            }
6358            if (std::abs(sum_b - 150.0) > 1e-10) {
6359                std::cout << "  [FAIL] : in pd_test_dataframe_aggregations() : sum B != 150" << std::endl;
6360                throw std::runtime_error("pd_test_dataframe_aggregations failed: sum B != 150");
6361            }
value_counts (pd_test_1_all.cpp:865)
855        std::vector<std::optional<std::string>> values = {
856            std::optional<std::string>("a"),
857            std::optional<std::string>("b"),
858            std::optional<std::string>("a"),
859            std::optional<std::string>("a"),
860            std::optional<std::string>("b"),
861            std::nullopt  // NA not counted
862        };
863        pandas::CategoricalArray arr(values);
864
865        auto [cats, counts] = arr.value_counts();
866
867        // Should have 2 categories
868        if (cats.size() != 2 || counts.size() != 2) {
869            std::cout << "  [FAIL] : in pd_test_categorical_array_value_counts() : wrong size" << std::endl;
870            throw std::runtime_error("pd_test_categorical_array_value_counts failed: wrong size");
871        }
872
873        // Find 'a' count
874        int64_t a_count = 0, b_count = 0;
875        for (size_t i = 0; i < cats.size(); ++i) {
var (pd_test_1_all.cpp:20890)
20880                throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881            }
20882
20883            std::cout << " -> tests passed" << std::endl;
20884        }
20885
20886        void pd_test_expanding_var() {
20887            std::cout << "========= Expanding var =========================";
20888
20889            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890            auto result = s.expanding().var();
20891
20892            // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893            bool passed = std::isnan(result[0]) &&
20894                          std::abs(result[1] - 0.5) < 0.001 &&
20895                          std::abs(result[2] - 1.0) < 0.001 &&
20896                          std::abs(result[3] - 1.6667) < 0.001 &&
20897                          std::abs(result[4] - 2.5) < 0.001;
20898            if (!passed) {
20899                std::cout << "  [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg_to_series (pd_test_2_all.cpp:19154)
19144    pandas::DataFrame df;
19145    df.add_column("a", std::vector<numpy::float64>{1.0, 2.0, 3.0, 4.0});
19146    df.add_column("b", std::vector<numpy::float64>{10.0, 20.0, 30.0, 40.0});
19147
19148    // Dict-simple form: {col: "func"} -> Series
19149    std::map<std::string, std::string> col_funcs;
19150    col_funcs["a"] = "sum";
19151    col_funcs["b"] = "mean";
19152
19153    pandas::Series<numpy::float64> result = df.agg_to_series(col_funcs);
19154
19155    // a.sum() = 10.0, b.mean() = 25.0
19156    check(result.size() == 2, "result_size_2");
19157
19158    // std::map iterates in alphabetical order: a, b
19159    check(std::abs(result.get_value_double(0) - 10.0) < 1e-9, "a_sum_10");
19160    check(std::abs(result.get_value_double(1) - 25.0) < 1e-9, "b_mean_25");
19161
19162    // Check index labels
19163    check(result.index().get_value_str(0) == "a", "index_0_a");
agg_to_series (pd_test_2_all.cpp:19154)
19144    pandas::DataFrame df;
19145    df.add_column("a", std::vector<numpy::float64>{1.0, 2.0, 3.0, 4.0});
19146    df.add_column("b", std::vector<numpy::float64>{10.0, 20.0, 30.0, 40.0});
19147
19148    // Dict-simple form: {col: "func"} -> Series
19149    std::map<std::string, std::string> col_funcs;
19150    col_funcs["a"] = "sum";
19151    col_funcs["b"] = "mean";
19152
19153    pandas::Series<numpy::float64> result = df.agg_to_series(col_funcs);
19154
19155    // a.sum() = 10.0, b.mean() = 25.0
19156    check(result.size() == 2, "result_size_2");
19157
19158    // std::map iterates in alphabetical order: a, b
19159    check(std::abs(result.get_value_double(0) - 10.0) < 1e-9, "a_sum_10");
19160    check(std::abs(result.get_value_double(1) - 25.0) < 1e-9, "b_mean_25");
19161
19162    // Check index labels
19163    check(result.index().get_value_str(0) == "a", "index_0_a");
aggregate (pd_test_1_all.cpp:11139)
11129            auto custom_agg = s.agg([](const std::vector<double>& v) {
11130                return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131            });
11132            if (!approx_equal(custom_agg, 3.0)) {
11133                passed = false;
11134                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135                throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136            }
11137
11138            // Test aggregate alias
11139            auto alias_result = s.aggregate("sum");
11140            if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141                passed = false;
11142                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143                throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144            }
11145
11146            std::cout << " -> tests passed" << std::endl;
11147        }
11148
11149        void pd_test_func_apply_series_pipe() {
aggregate (pd_test_1_all.cpp:11139)
11129            auto custom_agg = s.agg([](const std::vector<double>& v) {
11130                return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131            });
11132            if (!approx_equal(custom_agg, 3.0)) {
11133                passed = false;
11134                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135                throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136            }
11137
11138            // Test aggregate alias
11139            auto alias_result = s.aggregate("sum");
11140            if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141                passed = false;
11142                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143                throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144            }
11145
11146            std::cout << " -> tests passed" << std::endl;
11147        }
11148
11149        void pd_test_func_apply_series_pipe() {
aggregate (pd_test_1_all.cpp:11139)
11129            auto custom_agg = s.agg([](const std::vector<double>& v) {
11130                return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131            });
11132            if (!approx_equal(custom_agg, 3.0)) {
11133                passed = false;
11134                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135                throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136            }
11137
11138            // Test aggregate alias
11139            auto alias_result = s.aggregate("sum");
11140            if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141                passed = false;
11142                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143                throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144            }
11145
11146            std::cout << " -> tests passed" << std::endl;
11147        }
11148
11149        void pd_test_func_apply_series_pipe() {
aggregate (pd_test_1_all.cpp:11139)
11129            auto custom_agg = s.agg([](const std::vector<double>& v) {
11130                return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131            });
11132            if (!approx_equal(custom_agg, 3.0)) {
11133                passed = false;
11134                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135                throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136            }
11137
11138            // Test aggregate alias
11139            auto alias_result = s.aggregate("sum");
11140            if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141                passed = false;
11142                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143                throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144            }
11145
11146            std::cout << " -> tests passed" << std::endl;
11147        }
11148
11149        void pd_test_func_apply_series_pipe() {
apply (pd_test_1_all.cpp:11244)
11234        void pd_test_func_apply_dataframe_apply_axis0() {
11235            std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237            std::map<std::string, std::vector<double>> data = {
11238                {"A", {1.0, 2.0, 3.0}},
11239                {"B", {4.0, 5.0, 6.0}}
11240            };
11241            pandas::DataFrame df(data);
11242
11243            // apply axis=0 applies function to each column
11244            auto result = df.apply([](const std::vector<double>& col) {
11245                return std::accumulate(col.begin(), col.end(), 0.0);
11246            }, 0);
11247
11248            bool passed = true;
11249
11250            // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251            // with the original column names ("A", "B") as the row index.
11252            // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253            const auto& result_col = result["result"];
11254            double sum_a = std::stod(result_col.get_value_str(0));
apply (pd_test_1_all.cpp:11244)
11234        void pd_test_func_apply_dataframe_apply_axis0() {
11235            std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237            std::map<std::string, std::vector<double>> data = {
11238                {"A", {1.0, 2.0, 3.0}},
11239                {"B", {4.0, 5.0, 6.0}}
11240            };
11241            pandas::DataFrame df(data);
11242
11243            // apply axis=0 applies function to each column
11244            auto result = df.apply([](const std::vector<double>& col) {
11245                return std::accumulate(col.begin(), col.end(), 0.0);
11246            }, 0);
11247
11248            bool passed = true;
11249
11250            // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251            // with the original column names ("A", "B") as the row index.
11252            // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253            const auto& result_col = result["result"];
11254            double sum_a = std::stod(result_col.get_value_str(0));
apply (pd_test_1_all.cpp:11244)
11234        void pd_test_func_apply_dataframe_apply_axis0() {
11235            std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237            std::map<std::string, std::vector<double>> data = {
11238                {"A", {1.0, 2.0, 3.0}},
11239                {"B", {4.0, 5.0, 6.0}}
11240            };
11241            pandas::DataFrame df(data);
11242
11243            // apply axis=0 applies function to each column
11244            auto result = df.apply([](const std::vector<double>& col) {
11245                return std::accumulate(col.begin(), col.end(), 0.0);
11246            }, 0);
11247
11248            bool passed = true;
11249
11250            // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251            // with the original column names ("A", "B") as the row index.
11252            // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253            const auto& result_col = result["result"];
11254            double sum_a = std::stod(result_col.get_value_str(0));
apply_callable (pd_test_5_all.cpp:86745)
86735    auto row_cb = [&](const pandas::ApplyRowInput&) -> pandas::ApplyCellResult {
86736        pandas::ApplyCellResult out;
86737        out.kind = pandas::ApplyCellResult::Float;
86738        out.f    = std::numeric_limits<double>::quiet_NaN();
86739        return out;
86740    };
86741
86742    bool threw = false;
86743    pandas::ApplyResult result;
86744    try {
86745        result = df.apply_callable(/*axis=*/0, row_cb, col_cb);
86746    } catch (const std::exception& e) {
86747        threw = true;
86748        std::cout << "    threw: " << e.what() << "\n";
86749    }
86750    pandas_tests::check(!threw, "case_27.apply_callable_no_throw", local_fail);
86751    pandas_tests::check(col_cb_called, "case_27.col_cb_invoked", local_fail);
86752    pandas_tests::check(observed_idx == 0, "case_27.col_cb_idx_is_0", local_fail);
86753    pandas_tests::check(observed_name == "a", "case_27.col_cb_name_is_a", local_fail);
86754}
apply_resolved_typed (pd_test_5_all.cpp:98141)
98131    switch (cid) {
98132        case CbId::Int:   cb = cb_int(hist);    break;
98133        case CbId::Bool:  cb = cb_bool(hist);   break;
98134        case CbId::Float: cb = cb_float(hist);  break;
98135        case CbId::Str:   cb = cb_string(hist); break;
98136        case CbId::Mixed: cb = cb_mixed(hist);  break;
98137    }
98138
98139    pandas::Result r;
98140    try {
98141        r = s.apply_resolved_typed(cb, hist);
98142    } catch (const std::exception& e) {
98143        std::string tag = std::string("apply src=") + src_name(sid) +
98144                          " cb=" + cb_name(cid) + " mode=" + mode_name(mid);
98145        std::cout << "[FAIL] : in f_27a_core_3094022_apply_resolved_typed_post_cb_dtype() "
98146                  << tag << " unexpected exception: " << e.what() << "\n";
98147        ++pandas_tests::g_failed; ++local_fail;
98148        ++pandas_tests::g_failed; ++local_fail;
98149        ++pandas_tests::g_failed; ++local_fail;
98150        return;
98151    }
apply_with_args (pd_test_3_all.cpp:16993)
16983        }
16984    }
16985
16986    if (!passed) {
16987        throw std::runtime_error("pd_test_apply_axis1_broadcast failed");
16988    }
16989    std::cout << " -> tests passed" << std::endl;
16990}
16991
16992void pd_test_apply_with_args() {
16993    std::cout << "========= DataFrame.apply_with_args() =================";
16994
16995    std::map<std::string, std::vector<double>> data = {
16996        {"A", {1.0, 2.0, 3.0}},
16997        {"B", {4.0, 5.0, 6.0}}
16998    };
16999    pandas::DataFrame df(data);
17000
17001    // Apply with additional argument: multiply sum by factor
17002    auto result = df.apply_with_args(
17003        [](const std::vector<double>& col, double factor) {
applymap (pd_test_1_all.cpp:11194)
11184        void pd_test_func_apply_dataframe_applymap() {
11185            std::cout << "========= DataFrame applymap ==========================";
11186
11187            std::map<std::string, std::vector<double>> data = {
11188                {"A", {1.0, 2.0, 3.0}},
11189                {"B", {4.0, 5.0, 6.0}}
11190            };
11191            pandas::DataFrame df(data);
11192
11193            // applymap applies function element-wise
11194            auto result = df.applymap([](double x) { return x * x; });
11195
11196            bool passed = true;
11197
11198            // Check column A squared
11199            const auto& col_a = result["A"];
11200            std::vector<double> expected_a = {1.0, 4.0, 9.0};
11201            for (size_t i = 0; i < 3; ++i) {
11202                double val = std::stod(col_a.get_value_str(i));
11203                if (!approx_equal(val, expected_a[i])) {
11204                    passed = false;
ewm (pd_test_3_all.cpp:2961)
2951    // Test expanding sum
2952    pandas::DataFrame expanding_sum = df.expanding().sum();
2953    if (expanding_sum.nrows() != 5 || expanding_sum.ncols() != 2) {
2954        throw std::runtime_error("expanding().sum() shape failed");
2955    }
2956
2957    std::cout << " -> tests passed" << std::endl;
2958}
2959
2960void pd_test_3_all_df_ewm() {
2961    std::cout << "========= DataFrame.ewm() ================================";
2962
2963    std::map<std::string, std::vector<double>> data = {
2964        {"A", {1.0, 2.0, 3.0, 4.0, 5.0}},
2965        {"B", {10.0, 20.0, 30.0, 40.0, 50.0}}
2966    };
2967    pandas::DataFrame df(data);
2968
2969    // Test ewm mean with span=3
2970    pandas::DataFrame ewm_mean = df.ewm(std::nullopt, 3.0).mean();
2971    if (ewm_mean.nrows() != 5 || ewm_mean.ncols() != 2) {
expanding (pd_test_1_all.cpp:20770)
20760                throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761            }
20762
20763            std::cout << " -> tests passed" << std::endl;
20764        }
20765
20766        void pd_test_expanding_sum() {
20767            std::cout << "========= Expanding sum =========================";
20768
20769            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20770            auto result = s.expanding().sum();
20771
20772            // Cumulative sum: 1, 3, 6, 10, 15
20773            bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20774                          std::abs(result[1] - 3.0) < 0.001 &&
20775                          std::abs(result[2] - 6.0) < 0.001 &&
20776                          std::abs(result[3] - 10.0) < 0.001 &&
20777                          std::abs(result[4] - 15.0) < 0.001;
20778            if (!passed) {
20779                std::cout << "  [FAIL] : in pd_test_expanding_sum() : expanding sum values incorrect" << std::endl;
20780                throw std::runtime_error("pd_test_expanding_sum failed: expanding sum values incorrect");
groupby (pd_test_1_all.cpp:11495)
11485            std::cout << "========= GroupBy basic =========================";
11486
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
groupby (pd_test_1_all.cpp:11495)
11485            std::cout << "========= GroupBy basic =========================";
11486
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
groupby (pd_test_1_all.cpp:11495)
11485            std::cout << "========= GroupBy basic =========================";
11486
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
groupby (pd_test_1_all.cpp:11495)
11485            std::cout << "========= GroupBy basic =========================";
11486
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
groupby (pd_test_1_all.cpp:11495)
11485            std::cout << "========= GroupBy basic =========================";
11486
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
groupby (pd_test_1_all.cpp:11495)
11485            std::cout << "========= GroupBy basic =========================";
11486
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833    std::cout << "========= map =========================================";
5834
5835    pandas::CategoricalArray arr({"yes", "no", "yes"});
5836    pandas::CategoricalIndex idx(arr);
5837
5838    std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839    pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841    bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842                   !mapped.has_category("yes") && !mapped.has_category("no"));
5843    if (!passed) {
5844        std::cout << "  [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845        throw std::runtime_error("pd_test_categorical_index_map failed");
5846    }
5847
5848    std::cout << " -> tests passed" << std::endl;
5849}
map_callable_resolved (pd_test_5_all.cpp:98564)
98554        case MethodId::TransformA0:
98555        case MethodId::TransformA1:  return "transform";
98556    }
98557    return "?";
98558}
98559
98560static pandas::DataFrame run_method(MethodId mid, const pandas::DataFrame& df,
98561                                    CbId cid, Hist& hist) {
98562    if (mid == MethodId::Map) {
98563        MapCb cb = mk_map_cb(cid, hist);
98564        return df.map_callable_resolved(cb, hist);
98565    }
98566    SeriesCb cb = mk_series_cb(cid, hist);
98567    switch (mid) {
98568        case MethodId::ApplyA0:      return df.apply_resolved_typed(cb, hist, 0);
98569        case MethodId::ApplyA1:      return df.apply_resolved_typed(cb, hist, 1);
98570        case MethodId::TransformA0:  return df.transform_callable_resolved(cb, hist, 0);
98571        case MethodId::TransformA1:  return df.transform_callable_resolved(cb, hist, 1);
98572        default: break;
98573    }
98574    return pandas::DataFrame{};
pipe (pd_test_1_all.cpp:11164)
11154            // Pipe applies function to entire Series
11155            auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11156                auto mean_val = ser.mean();
11157                std::vector<double> result;
11158                for (size_t i = 0; i < ser.size(); ++i) {
11159                    result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11160                }
11161                return pandas::Series<double>(result, ser.name());
11162            };
11163
11164            auto result = s.pipe(add_mean, 10.0);
11165
11166            bool passed = true;
11167            // mean is 2.5, offset is 10.0, so each value + 12.5
11168            std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11169            for (size_t i = 0; i < result.size(); ++i) {
11170                if (!approx_equal(result[i], expected[i])) {
11171                    passed = false;
11172                    std::cout << "  [FAIL] : in pd_test_func_apply_series_pipe() : value mismatch at " << i << std::endl;
11173                    throw std::runtime_error("pd_test_func_apply_series_pipe failed: value mismatch");
11174                }
pipe (pd_test_1_all.cpp:11164)
11154            // Pipe applies function to entire Series
11155            auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11156                auto mean_val = ser.mean();
11157                std::vector<double> result;
11158                for (size_t i = 0; i < ser.size(); ++i) {
11159                    result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11160                }
11161                return pandas::Series<double>(result, ser.name());
11162            };
11163
11164            auto result = s.pipe(add_mean, 10.0);
11165
11166            bool passed = true;
11167            // mean is 2.5, offset is 10.0, so each value + 12.5
11168            std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11169            for (size_t i = 0; i < result.size(); ++i) {
11170                if (!approx_equal(result[i], expected[i])) {
11171                    passed = false;
11172                    std::cout << "  [FAIL] : in pd_test_func_apply_series_pipe() : value mismatch at " << i << std::endl;
11173                    throw std::runtime_error("pd_test_func_apply_series_pipe failed: value mismatch");
11174                }
resample (pd_test_1_all.cpp:20321)
20311                "2020-01-01 00:00:00",
20312                "2020-01-01 12:00:00",
20313                "2020-01-02 00:00:00",
20314                "2020-01-02 12:00:00",
20315                "2020-01-03 00:00:00",
20316                "2020-01-03 12:00:00"
20317            };
20318            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20319
20320            // Resample to daily
20321            auto resampler = df.resample("D");
20322            pandas::DataFrame result = resampler.sum();
20323
20324            // Check that we got aggregated results
20325            bool passed = (result.nrows() <= df.nrows());
20326
20327            if (!passed) {
20328                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
20329                throw std::runtime_error("pd_test_timeseries_resample_basic failed");
20330            }
rolling (pd_test_1_all.cpp:20667)
20657#include <vector>
20658#include "../pandas/pd_series.h"
20659
20660namespace dataframe_tests {
20661    namespace dataframe_tests_windowing {
20662
20663        void pd_test_rolling_sum() {
20664            std::cout << "========= Rolling sum ===========================";
20665
20666            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20667            auto result = s.rolling(3).sum();
20668
20669            // Window 3:
20670            // idx 0: [1] -> NaN (not enough values)
20671            // idx 1: [1,2] -> NaN (not enough values)
20672            // idx 2: [1,2,3] -> 6
20673            // idx 3: [2,3,4] -> 9
20674            // idx 4: [3,4,5] -> 12
20675            bool passed = result.size() == 5;
20676            if (!passed) {
20677                std::cout << "  [FAIL] : in pd_test_rolling_sum() : result size should be 5" << std::endl;
transform (pd_test_1_all.cpp:11071)
11061            std::cout << " -> tests passed" << std::endl;
11062        }
11063
11064        void pd_test_func_apply_series_transform() {
11065            std::cout << "========= Series transform ============================";
11066
11067            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069            // Transform must return same shape
11070            auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072            bool passed = true;
11073            if (result.size() != s.size()) {
11074                passed = false;
11075                std::cout << "  [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076                throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077            }
11078
11079            std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080            for (size_t i = 0; i < result.size(); ++i) {
transform (pd_test_1_all.cpp:11071)
11061            std::cout << " -> tests passed" << std::endl;
11062        }
11063
11064        void pd_test_func_apply_series_transform() {
11065            std::cout << "========= Series transform ============================";
11066
11067            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069            // Transform must return same shape
11070            auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072            bool passed = true;
11073            if (result.size() != s.size()) {
11074                passed = false;
11075                std::cout << "  [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076                throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077            }
11078
11079            std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080            for (size_t i = 0; i < result.size(); ++i) {
transform (pd_test_1_all.cpp:11071)
11061            std::cout << " -> tests passed" << std::endl;
11062        }
11063
11064        void pd_test_func_apply_series_transform() {
11065            std::cout << "========= Series transform ============================";
11066
11067            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069            // Transform must return same shape
11070            auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072            bool passed = true;
11073            if (result.size() != s.size()) {
11074                passed = false;
11075                std::cout << "  [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076                throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077            }
11078
11079            std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080            for (size_t i = 0; i < result.size(); ++i) {
transform (pd_test_1_all.cpp:11071)
11061            std::cout << " -> tests passed" << std::endl;
11062        }
11063
11064        void pd_test_func_apply_series_transform() {
11065            std::cout << "========= Series transform ============================";
11066
11067            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069            // Transform must return same shape
11070            auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072            bool passed = true;
11073            if (result.size() != s.size()) {
11074                passed = false;
11075                std::cout << "  [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076                throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077            }
11078
11079            std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080            for (size_t i = 0; i < result.size(); ++i) {
transform (pd_test_1_all.cpp:11071)
11061            std::cout << " -> tests passed" << std::endl;
11062        }
11063
11064        void pd_test_func_apply_series_transform() {
11065            std::cout << "========= Series transform ============================";
11066
11067            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069            // Transform must return same shape
11070            auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072            bool passed = true;
11073            if (result.size() != s.size()) {
11074                passed = false;
11075                std::cout << "  [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076                throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077            }
11078
11079            std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080            for (size_t i = 0; i < result.size(); ++i) {
transform_callable_resolved (pd_test_5_all.cpp:98570)
98560static pandas::DataFrame run_method(MethodId mid, const pandas::DataFrame& df,
98561                                    CbId cid, Hist& hist) {
98562    if (mid == MethodId::Map) {
98563        MapCb cb = mk_map_cb(cid, hist);
98564        return df.map_callable_resolved(cb, hist);
98565    }
98566    SeriesCb cb = mk_series_cb(cid, hist);
98567    switch (mid) {
98568        case MethodId::ApplyA0:      return df.apply_resolved_typed(cb, hist, 0);
98569        case MethodId::ApplyA1:      return df.apply_resolved_typed(cb, hist, 1);
98570        case MethodId::TransformA0:  return df.transform_callable_resolved(cb, hist, 0);
98571        case MethodId::TransformA1:  return df.transform_callable_resolved(cb, hist, 1);
98572        default: break;
98573    }
98574    return pandas::DataFrame{};
98575}
98576
98577static Shape expected_shape(MethodId mid, const pandas::DataFrame& src) {
98578    switch (mid) {
98579        case MethodId::ApplyA0:      return {1u, src.ncols()};
98580        case MethodId::ApplyA1:      return {src.nrows(), 1u};
add (pd_test_1_all.cpp:4844)
4834namespace dataframe_tests {
4835    namespace dataframe_tests_arithmetic {
4836
4837        void pd_test_arithmetic_series_named_ops() {
4838            std::cout << "========= Series named ops ======================";
4839
4840            pandas::Series<double> a({1.0, 2.0, 3.0});
4841            pandas::Series<double> b({4.0, 5.0, 6.0});
4842
4843            auto sum = a.add(b);
4844            bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4845            if (!passed) {
4846                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4847                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4848            }
4849
4850            auto diff = a.sub(b);
4851            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4852            if (!passed) {
4853                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
add (pd_test_1_all.cpp:4844)
4834namespace dataframe_tests {
4835    namespace dataframe_tests_arithmetic {
4836
4837        void pd_test_arithmetic_series_named_ops() {
4838            std::cout << "========= Series named ops ======================";
4839
4840            pandas::Series<double> a({1.0, 2.0, 3.0});
4841            pandas::Series<double> b({4.0, 5.0, 6.0});
4842
4843            auto sum = a.add(b);
4844            bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4845            if (!passed) {
4846                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4847                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4848            }
4849
4850            auto diff = a.sub(b);
4851            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4852            if (!passed) {
4853                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
add (pd_test_1_all.cpp:4844)
4834namespace dataframe_tests {
4835    namespace dataframe_tests_arithmetic {
4836
4837        void pd_test_arithmetic_series_named_ops() {
4838            std::cout << "========= Series named ops ======================";
4839
4840            pandas::Series<double> a({1.0, 2.0, 3.0});
4841            pandas::Series<double> b({4.0, 5.0, 6.0});
4842
4843            auto sum = a.add(b);
4844            bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4845            if (!passed) {
4846                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4847                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4848            }
4849
4850            auto diff = a.sub(b);
4851            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4852            if (!passed) {
4853                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
add_column (pd_test_1_all.cpp:6540)
6530        void pd_test_dataframe_manipulation() {
6531            std::cout << "========= data manipulation ================";
6532
6533            std::map<std::string, std::vector<numpy::int64>> data;
6534            data["A"] = {1, 2, 3};
6535            data["B"] = {4, 5, 6};
6536
6537            pandas::DataFrame df(data);
6538
6539            // Test add_column
6540            df.add_column<numpy::int64>("C", {7, 8, 9});
6541            if (df.ncols() != 3) {
6542                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : add_column ncols != 3" << std::endl;
6543                throw std::runtime_error("pd_test_dataframe_manipulation failed: add_column ncols != 3");
6544            }
6545
6546            // Test pop
6547            auto popped = df.pop("C");
6548            if (df.ncols() != 2) {
6549                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550                throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
add_column_nullable (pd_test_3_all.cpp:953)
943        df.add_column<pandas::Nullable<bool>>("A", {true, false, pandas::None});
944        df.add_column<std::int64_t>("B", {1, 2, 3});
945        if (df.ncols() != 2 || df.nrows() != 3) {
946            throw std::runtime_error("Nullable<bool> add_column: shape");
947        }
948    }
949
950    // Case E: add_column_nullable<int64> + NA_INT64
951    {
952        pandas::DataFrame df;
953        df.add_column_nullable<int64_t>("int_na",
954            {1, pandas::NA_INT64, 3, pandas::NA_INT64});
955        df.add_column_nullable<bool>("bool_na",
956            {true, pandas::NA_BOOL, false, pandas::NA_BOOL});
957        df.add_column_nullable<std::string>("str_na",
958            {std::string("a"), pandas::NA_STRING, std::string("c"), pandas::NA_STRING});
959
960        if (df.ncols() != 3 || df.nrows() != 4) {
961            throw std::runtime_error("add_column_nullable: shape");
962        }
963    }
add_column_with_dtype_override (pd_test_2_all.cpp:19266)
19256// =====================================================================
19257// Test: sum() converts bool columns to int64 counts
19258// =====================================================================
19259
19260void pd_test_agg_dtype_sum_bool_to_int() {
19261    std::cout << "  -- pd_test_agg_dtype_sum_bool_to_int --" << std::endl;
19262
19263    pandas::DataFrame df;
19264    // Create a bool column using string values with dtype override
19265    df.add_column_with_dtype_override("flag", std::vector<std::string>{"True", "False", "True"}, "bool");
19266
19267    auto result = df.sum(0, true, false, 0);
19268    check(approx_eq(result.iat(static_cast<size_t>(0)), 2.0), "bool_count_true_eq_2");
19269    check(result.dtype_name() == "int64", "bool_sum_dtype_int64");
19270}
19271
19272// =====================================================================
19273// Test: sum() preserves nullable Int64 dtype
19274// =====================================================================
add_prefix (pd_test_2_all.cpp:4)
 1// ------------------- pd_test_add_prefix.cpp (start) -----------------------------
 2// dataframe_tests/pd_test_add_prefix.cpp
 3// Tests for DataFrame.add_prefix() and add_suffix() methods (pandas 2.0+ API)
 4#include <iostream>
 5#include <stdexcept>
 6#include <vector>
 7#include <string>
 8#include <map>
 9#include "../pandas/pd_dataframe.h"
10#include "../pandas/pd_groupby.h"
11
12// CRITICAL: No using namespace directives
add_suffix (pd_test_2_all.cpp:4)
 1// ------------------- pd_test_add_prefix.cpp (start) -----------------------------
 2// dataframe_tests/pd_test_add_prefix.cpp
 3// Tests for DataFrame.add_prefix() and add_suffix() methods (pandas 2.0+ API)
 4#include <iostream>
 5#include <stdexcept>
 6#include <vector>
 7#include <string>
 8#include <map>
 9#include "../pandas/pd_dataframe.h"
10#include "../pandas/pd_groupby.h"
11
12// CRITICAL: No using namespace directives
div (pd_test_1_all.cpp:4865)
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
4869                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: div failed");
4870            }
4871
4872            std::cout << " -> tests passed" << std::endl;
4873        }
4874
4875        void pd_test_arithmetic_series_floordiv_mod() {
div (pd_test_1_all.cpp:4865)
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
4869                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: div failed");
4870            }
4871
4872            std::cout << " -> tests passed" << std::endl;
4873        }
4874
4875        void pd_test_arithmetic_series_floordiv_mod() {
div (pd_test_1_all.cpp:4865)
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
4869                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: div failed");
4870            }
4871
4872            std::cout << " -> tests passed" << std::endl;
4873        }
4874
4875        void pd_test_arithmetic_series_floordiv_mod() {
divide (pd_test_3_all.cpp:555)
545    if (mul_result.size() != 4) {
546        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548    }
549    // 10*2=20
550    if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553    }
554
555    // Test divide()
556    pandas::Series<numpy::float64> div_result = s1.divide(s2);
557    if (div_result.size() != 4) {
558        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : divide() size mismatch" << std::endl;
559        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide()");
560    }
561    // 10/2=5
562    if (std::abs(div_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
563        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : divide() value mismatch" << std::endl;
564        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide() value");
565    }
divide (pd_test_3_all.cpp:555)
545    if (mul_result.size() != 4) {
546        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548    }
549    // 10*2=20
550    if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553    }
554
555    // Test divide()
556    pandas::Series<numpy::float64> div_result = s1.divide(s2);
557    if (div_result.size() != 4) {
558        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : divide() size mismatch" << std::endl;
559        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide()");
560    }
561    // 10/2=5
562    if (std::abs(div_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
563        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : divide() value mismatch" << std::endl;
564        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide() value");
565    }
dot (pd_test_1_all.cpp:22594)
22584        std::cout << "====================================== [OK] pd_test_all_any test suite ========================== " << std::endl;
22585        return 0;
22586    }
22587
22588} // namespace dataframe_tests
22589// ------------------- pd_test_all_any.cpp (end) -----------------------------
22590
22591// ------------------- pd_test_dot.cpp (start) -----------------------------
22592// dataframe_tests/pd_test_dot.cpp
22593// Test DataFrame.dot() method - matrix multiplication
22594
22595#include <iostream>
22596#include <stdexcept>
22597#include <cmath>
22598#include "../pandas/pd_dataframe.h"
22599
22600// CRITICAL: No using namespace directives
22601
22602namespace dataframe_tests {
22603    namespace dataframe_tests_dot {
dot (pd_test_1_all.cpp:22594)
22584        std::cout << "====================================== [OK] pd_test_all_any test suite ========================== " << std::endl;
22585        return 0;
22586    }
22587
22588} // namespace dataframe_tests
22589// ------------------- pd_test_all_any.cpp (end) -----------------------------
22590
22591// ------------------- pd_test_dot.cpp (start) -----------------------------
22592// dataframe_tests/pd_test_dot.cpp
22593// Test DataFrame.dot() method - matrix multiplication
22594
22595#include <iostream>
22596#include <stdexcept>
22597#include <cmath>
22598#include "../pandas/pd_dataframe.h"
22599
22600// CRITICAL: No using namespace directives
22601
22602namespace dataframe_tests {
22603    namespace dataframe_tests_dot {
floordiv (pd_test_1_all.cpp:4881)
4871            std::cout << " -> tests passed" << std::endl;
4872        }
4873
4874        void pd_test_arithmetic_series_floordiv_mod() {
4875            std::cout << "========= Series floordiv/mod ===================";
4876
4877            pandas::Series<double> a({7.0, 8.0, 9.0});
4878            pandas::Series<double> b({2.0, 3.0, 4.0});
4879
4880            auto fd = a.floordiv(b);
4881            bool passed = std::abs(fd[0] - 3.0) < 0.001;  // 7 // 2 = 3
4882            if (!passed) {
4883                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4884                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4885            }
4886
4887            auto m = a.mod(b);
4888            passed = std::abs(m[0] - 1.0) < 0.001;  // 7 % 2 = 1
4889            if (!passed) {
4890                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
floordiv (pd_test_1_all.cpp:4881)
4871            std::cout << " -> tests passed" << std::endl;
4872        }
4873
4874        void pd_test_arithmetic_series_floordiv_mod() {
4875            std::cout << "========= Series floordiv/mod ===================";
4876
4877            pandas::Series<double> a({7.0, 8.0, 9.0});
4878            pandas::Series<double> b({2.0, 3.0, 4.0});
4879
4880            auto fd = a.floordiv(b);
4881            bool passed = std::abs(fd[0] - 3.0) < 0.001;  // 7 // 2 = 3
4882            if (!passed) {
4883                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4884                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4885            }
4886
4887            auto m = a.mod(b);
4888            passed = std::abs(m[0] - 1.0) < 0.001;  // 7 % 2 = 1
4889            if (!passed) {
4890                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
mod (pd_test_1_all.cpp:4888)
4878            pandas::Series<double> a({7.0, 8.0, 9.0});
4879            pandas::Series<double> b({2.0, 3.0, 4.0});
4880
4881            auto fd = a.floordiv(b);
4882            bool passed = std::abs(fd[0] - 3.0) < 0.001;  // 7 // 2 = 3
4883            if (!passed) {
4884                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4885                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4886            }
4887
4888            auto m = a.mod(b);
4889            passed = std::abs(m[0] - 1.0) < 0.001;  // 7 % 2 = 1
4890            if (!passed) {
4891                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
4892                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: mod failed");
4893            }
4894
4895            // Scalar operations
4896            auto fd_scalar = a.floordiv(2.0);
4897            passed = std::abs(fd_scalar[0] - 3.0) < 0.001 && std::abs(fd_scalar[1] - 4.0) < 0.001;
4898            if (!passed) {
mod (pd_test_1_all.cpp:4888)
4878            pandas::Series<double> a({7.0, 8.0, 9.0});
4879            pandas::Series<double> b({2.0, 3.0, 4.0});
4880
4881            auto fd = a.floordiv(b);
4882            bool passed = std::abs(fd[0] - 3.0) < 0.001;  // 7 // 2 = 3
4883            if (!passed) {
4884                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4885                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4886            }
4887
4888            auto m = a.mod(b);
4889            passed = std::abs(m[0] - 1.0) < 0.001;  // 7 % 2 = 1
4890            if (!passed) {
4891                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
4892                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: mod failed");
4893            }
4894
4895            // Scalar operations
4896            auto fd_scalar = a.floordiv(2.0);
4897            passed = std::abs(fd_scalar[0] - 3.0) < 0.001 && std::abs(fd_scalar[1] - 4.0) < 0.001;
4898            if (!passed) {
mul (pd_test_1_all.cpp:4858)
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
mul (pd_test_1_all.cpp:4858)
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
mul (pd_test_1_all.cpp:4858)
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
mul (pd_test_1_all.cpp:4858)
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
multiindex (pd_test_1_all.cpp:27024)
27014            pandas::DataFrame df(data);
27015
27016            auto result = df.value_counts();
27017            auto& counts = std::get<pandas::Series<numpy::int64>>(result);
27018
27019            if (!counts.has_multiindex()) {
27020                std::cout << "  [FAIL] : expected MultiIndex" << std::endl;
27021                throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: no multiindex");
27022            }
27023
27024            const auto& midx = counts.multiindex();
27025
27026            // Should have 2 levels
27027            if (midx.nlevels() != 2) {
27028                std::cout << "  [FAIL] : expected 2 levels, got " << midx.nlevels() << std::endl;
27029                throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: wrong nlevels");
27030            }
27031
27032            std::cout << " -> tests passed" << std::endl;
27033        }
multiply (pd_test_3_all.cpp:543)
533    if (sub_result.size() != 4) {
534        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536    }
537    // 10-2=8
538    if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541    }
542
543    // Test multiply()
544    pandas::Series<double> mul_result = s1.multiply(s2);
545    if (mul_result.size() != 4) {
546        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548    }
549    // 10*2=20
550    if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553    }
multiply (pd_test_3_all.cpp:543)
533    if (sub_result.size() != 4) {
534        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536    }
537    // 10-2=8
538    if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541    }
542
543    // Test multiply()
544    pandas::Series<double> mul_result = s1.multiply(s2);
545    if (mul_result.size() != 4) {
546        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548    }
549    // 10*2=20
550    if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553    }
pow (pd_test_1_all.cpp:4911)
4901            }
4902
4903            std::cout << " -> tests passed" << std::endl;
4904        }
4905
4906        void pd_test_arithmetic_series_pow() {
4907            std::cout << "========= Series pow ============================";
4908
4909            pandas::Series<double> a({2.0, 3.0, 4.0});
4910
4911            auto p = a.pow(2.0);
4912            bool passed = std::abs(p[0] - 4.0) < 0.001 && std::abs(p[1] - 9.0) < 0.001 && std::abs(p[2] - 16.0) < 0.001;
4913            if (!passed) {
4914                std::cout << "  [FAIL] : in pd_test_arithmetic_series_pow() : pow scalar failed" << std::endl;
4915                throw std::runtime_error("pd_test_arithmetic_series_pow failed: pow scalar failed");
4916            }
4917
4918            // Series pow Series
4919            pandas::Series<double> exp({1.0, 2.0, 0.5});
4920            auto p2 = a.pow(exp);
4921            passed = std::abs(p2[0] - 2.0) < 0.001 && std::abs(p2[1] - 9.0) < 0.001;  // 3^2=9
pow (pd_test_1_all.cpp:4911)
4901            }
4902
4903            std::cout << " -> tests passed" << std::endl;
4904        }
4905
4906        void pd_test_arithmetic_series_pow() {
4907            std::cout << "========= Series pow ============================";
4908
4909            pandas::Series<double> a({2.0, 3.0, 4.0});
4910
4911            auto p = a.pow(2.0);
4912            bool passed = std::abs(p[0] - 4.0) < 0.001 && std::abs(p[1] - 9.0) < 0.001 && std::abs(p[2] - 16.0) < 0.001;
4913            if (!passed) {
4914                std::cout << "  [FAIL] : in pd_test_arithmetic_series_pow() : pow scalar failed" << std::endl;
4915                throw std::runtime_error("pd_test_arithmetic_series_pow failed: pow scalar failed");
4916            }
4917
4918            // Series pow Series
4919            pandas::Series<double> exp({1.0, 2.0, 0.5});
4920            auto p2 = a.pow(exp);
4921            passed = std::abs(p2[0] - 2.0) < 0.001 && std::abs(p2[1] - 9.0) < 0.001;  // 3^2=9
radd (pd_test_2_all.cpp:7440)
7430            if (std::isinf(a) && std::isinf(b)) return (a > 0) == (b > 0);
7431            return std::abs(a - b) < tol;
7432        }
7433
7434        // Helper to get double value from DataFrame at position
7435        double get_val(const pandas::DataFrame& df, size_t row, size_t col) {
7436            return df.iloc<numpy::float64>(row, col);
7437        }
7438
7439        void pd_test_radd_scalar() {
7440            std::cout << "========= radd() with scalar =====================";
7441
7442            // Create DataFrame: angles=[0, 3, 4], degrees=[360, 180, 360]
7443            std::map<std::string, std::vector<double>> data = {
7444                {"angles", {0.0, 3.0, 4.0}},
7445                {"degrees", {360.0, 180.0, 360.0}}
7446            };
7447            pandas::DataFrame df(data);
7448
7449            // df.radd(1) should be equivalent to 1 + df
7450            pandas::DataFrame result = df.radd(1.0);
radd (pd_test_2_all.cpp:7440)
7430            if (std::isinf(a) && std::isinf(b)) return (a > 0) == (b > 0);
7431            return std::abs(a - b) < tol;
7432        }
7433
7434        // Helper to get double value from DataFrame at position
7435        double get_val(const pandas::DataFrame& df, size_t row, size_t col) {
7436            return df.iloc<numpy::float64>(row, col);
7437        }
7438
7439        void pd_test_radd_scalar() {
7440            std::cout << "========= radd() with scalar =====================";
7441
7442            // Create DataFrame: angles=[0, 3, 4], degrees=[360, 180, 360]
7443            std::map<std::string, std::vector<double>> data = {
7444                {"angles", {0.0, 3.0, 4.0}},
7445                {"degrees", {360.0, 180.0, 360.0}}
7446            };
7447            pandas::DataFrame df(data);
7448
7449            // df.radd(1) should be equivalent to 1 + df
7450            pandas::DataFrame result = df.radd(1.0);
radd (pd_test_2_all.cpp:7440)
7430            if (std::isinf(a) && std::isinf(b)) return (a > 0) == (b > 0);
7431            return std::abs(a - b) < tol;
7432        }
7433
7434        // Helper to get double value from DataFrame at position
7435        double get_val(const pandas::DataFrame& df, size_t row, size_t col) {
7436            return df.iloc<numpy::float64>(row, col);
7437        }
7438
7439        void pd_test_radd_scalar() {
7440            std::cout << "========= radd() with scalar =====================";
7441
7442            // Create DataFrame: angles=[0, 3, 4], degrees=[360, 180, 360]
7443            std::map<std::string, std::vector<double>> data = {
7444                {"angles", {0.0, 3.0, 4.0}},
7445                {"degrees", {360.0, 180.0, 360.0}}
7446            };
7447            pandas::DataFrame df(data);
7448
7449            // df.radd(1) should be equivalent to 1 + df
7450            pandas::DataFrame result = df.radd(1.0);
rdiv (pd_test_2_all.cpp:7713)
7703            }
7704
7705            if (!passed) {
7706                throw std::runtime_error("pd_test_rmul_with_fill_value failed");
7707            }
7708
7709            std::cout << " -> tests passed" << std::endl;
7710        }
7711
7712        void pd_test_rdiv_scalar() {
7713            std::cout << "========= rdiv() with scalar =====================";
7714
7715            // From pandas docs example: df.rdiv(10) divides 10 BY the dataframe
7716            std::map<std::string, std::vector<double>> data = {
7717                {"angles", {0.0, 3.0, 4.0}},
7718                {"degrees", {360.0, 180.0, 360.0}}
7719            };
7720            pandas::DataFrame df(data);
7721
7722            // df.rdiv(10) = 10 / df
7723            pandas::DataFrame result = df.rdiv(10.0);
rdiv (pd_test_2_all.cpp:7713)
7703            }
7704
7705            if (!passed) {
7706                throw std::runtime_error("pd_test_rmul_with_fill_value failed");
7707            }
7708
7709            std::cout << " -> tests passed" << std::endl;
7710        }
7711
7712        void pd_test_rdiv_scalar() {
7713            std::cout << "========= rdiv() with scalar =====================";
7714
7715            // From pandas docs example: df.rdiv(10) divides 10 BY the dataframe
7716            std::map<std::string, std::vector<double>> data = {
7717                {"angles", {0.0, 3.0, 4.0}},
7718                {"degrees", {360.0, 180.0, 360.0}}
7719            };
7720            pandas::DataFrame df(data);
7721
7722            // df.rdiv(10) = 10 / df
7723            pandas::DataFrame result = df.rdiv(10.0);
rdiv (pd_test_2_all.cpp:7713)
7703            }
7704
7705            if (!passed) {
7706                throw std::runtime_error("pd_test_rmul_with_fill_value failed");
7707            }
7708
7709            std::cout << " -> tests passed" << std::endl;
7710        }
7711
7712        void pd_test_rdiv_scalar() {
7713            std::cout << "========= rdiv() with scalar =====================";
7714
7715            // From pandas docs example: df.rdiv(10) divides 10 BY the dataframe
7716            std::map<std::string, std::vector<double>> data = {
7717                {"angles", {0.0, 3.0, 4.0}},
7718                {"degrees", {360.0, 180.0, 360.0}}
7719            };
7720            pandas::DataFrame df(data);
7721
7722            // df.rdiv(10) = 10 / df
7723            pandas::DataFrame result = df.rdiv(10.0);
rfloordiv (pd_test_2_all.cpp:7909)
7899            }
7900
7901            if (!passed) {
7902                throw std::runtime_error("pd_test_rtruediv_with_fill_value failed");
7903            }
7904
7905            std::cout << " -> tests passed" << std::endl;
7906        }
7907
7908        void pd_test_rfloordiv_scalar() {
7909            std::cout << "========= rfloordiv() with scalar ================";
7910
7911            std::map<std::string, std::vector<double>> data = {
7912                {"A", {3.0, 4.0}},
7913                {"B", {7.0, 8.0}}
7914            };
7915            pandas::DataFrame df(data);
7916
7917            // df.rfloordiv(10) = 10 // df (floor division)
7918            pandas::DataFrame result = df.rfloordiv(10.0);
rfloordiv (pd_test_2_all.cpp:7909)
7899            }
7900
7901            if (!passed) {
7902                throw std::runtime_error("pd_test_rtruediv_with_fill_value failed");
7903            }
7904
7905            std::cout << " -> tests passed" << std::endl;
7906        }
7907
7908        void pd_test_rfloordiv_scalar() {
7909            std::cout << "========= rfloordiv() with scalar ================";
7910
7911            std::map<std::string, std::vector<double>> data = {
7912                {"A", {3.0, 4.0}},
7913                {"B", {7.0, 8.0}}
7914            };
7915            pandas::DataFrame df(data);
7916
7917            // df.rfloordiv(10) = 10 // df (floor division)
7918            pandas::DataFrame result = df.rfloordiv(10.0);
rmod (pd_test_2_all.cpp:8121)
8111            }
8112
8113            if (!passed) {
8114                throw std::runtime_error("pd_test_rfloordiv_division_by_zero failed");
8115            }
8116
8117            std::cout << " -> tests passed" << std::endl;
8118        }
8119
8120        void pd_test_rmod_scalar() {
8121            std::cout << "========= rmod() with scalar =====================";
8122
8123            std::map<std::string, std::vector<double>> data = {
8124                {"A", {3.0, 4.0}}
8125            };
8126            pandas::DataFrame df(data);
8127
8128            // df.rmod(10) = 10 % df
8129            pandas::DataFrame result = df.rmod(10.0);
8130
8131            bool passed = true;
rmod (pd_test_2_all.cpp:8121)
8111            }
8112
8113            if (!passed) {
8114                throw std::runtime_error("pd_test_rfloordiv_division_by_zero failed");
8115            }
8116
8117            std::cout << " -> tests passed" << std::endl;
8118        }
8119
8120        void pd_test_rmod_scalar() {
8121            std::cout << "========= rmod() with scalar =====================";
8122
8123            std::map<std::string, std::vector<double>> data = {
8124                {"A", {3.0, 4.0}}
8125            };
8126            pandas::DataFrame df(data);
8127
8128            // df.rmod(10) = 10 % df
8129            pandas::DataFrame result = df.rmod(10.0);
8130
8131            bool passed = true;
rmul (pd_test_2_all.cpp:7591)
7581            }
7582
7583            if (!passed) {
7584                throw std::runtime_error("pd_test_rsub_dataframe failed");
7585            }
7586
7587            std::cout << " -> tests passed" << std::endl;
7588        }
7589
7590        void pd_test_rmul_scalar() {
7591            std::cout << "========= rmul() with scalar =====================";
7592
7593            std::map<std::string, std::vector<double>> data = {
7594                {"A", {2.0, 3.0}},
7595                {"B", {4.0, 5.0}}
7596            };
7597            pandas::DataFrame df(data);
7598
7599            // df.rmul(10) = 10 * df
7600            pandas::DataFrame result = df.rmul(10.0);
rmul (pd_test_2_all.cpp:7591)
7581            }
7582
7583            if (!passed) {
7584                throw std::runtime_error("pd_test_rsub_dataframe failed");
7585            }
7586
7587            std::cout << " -> tests passed" << std::endl;
7588        }
7589
7590        void pd_test_rmul_scalar() {
7591            std::cout << "========= rmul() with scalar =====================";
7592
7593            std::map<std::string, std::vector<double>> data = {
7594                {"A", {2.0, 3.0}},
7595                {"B", {4.0, 5.0}}
7596            };
7597            pandas::DataFrame df(data);
7598
7599            // df.rmul(10) = 10 * df
7600            pandas::DataFrame result = df.rmul(10.0);
rmul (pd_test_2_all.cpp:7591)
7581            }
7582
7583            if (!passed) {
7584                throw std::runtime_error("pd_test_rsub_dataframe failed");
7585            }
7586
7587            std::cout << " -> tests passed" << std::endl;
7588        }
7589
7590        void pd_test_rmul_scalar() {
7591            std::cout << "========= rmul() with scalar =====================";
7592
7593            std::map<std::string, std::vector<double>> data = {
7594                {"A", {2.0, 3.0}},
7595                {"B", {4.0, 5.0}}
7596            };
7597            pandas::DataFrame df(data);
7598
7599            // df.rmul(10) = 10 * df
7600            pandas::DataFrame result = df.rmul(10.0);
rpow (pd_test_2_all.cpp:8327)
8317            }
8318
8319            if (!passed) {
8320                throw std::runtime_error("pd_test_rmod_modulo_by_zero failed");
8321            }
8322
8323            std::cout << " -> tests passed" << std::endl;
8324        }
8325
8326        void pd_test_rpow_scalar() {
8327            std::cout << "========= rpow() with scalar =====================";
8328
8329            std::map<std::string, std::vector<double>> data = {
8330                {"A", {2.0, 3.0}},
8331                {"B", {0.0, 1.0}}
8332            };
8333            pandas::DataFrame df(data);
8334
8335            // df.rpow(2) = 2 ** df
8336            pandas::DataFrame result = df.rpow(2.0);
rpow (pd_test_2_all.cpp:8327)
8317            }
8318
8319            if (!passed) {
8320                throw std::runtime_error("pd_test_rmod_modulo_by_zero failed");
8321            }
8322
8323            std::cout << " -> tests passed" << std::endl;
8324        }
8325
8326        void pd_test_rpow_scalar() {
8327            std::cout << "========= rpow() with scalar =====================";
8328
8329            std::map<std::string, std::vector<double>> data = {
8330                {"A", {2.0, 3.0}},
8331                {"B", {0.0, 1.0}}
8332            };
8333            pandas::DataFrame df(data);
8334
8335            // df.rpow(2) = 2 ** df
8336            pandas::DataFrame result = df.rpow(2.0);
rsub (pd_test_2_all.cpp:7520)
7510            }
7511
7512            if (!passed) {
7513                throw std::runtime_error("pd_test_radd_dataframe failed");
7514            }
7515
7516            std::cout << " -> tests passed" << std::endl;
7517        }
7518
7519        void pd_test_rsub_scalar() {
7520            std::cout << "========= rsub() with scalar =====================";
7521
7522            std::map<std::string, std::vector<double>> data = {
7523                {"A", {1.0, 2.0, 3.0}},
7524                {"B", {4.0, 5.0, 6.0}}
7525            };
7526            pandas::DataFrame df(data);
7527
7528            // df.rsub(10) = 10 - df
7529            pandas::DataFrame result = df.rsub(10.0);
rsub (pd_test_2_all.cpp:7520)
7510            }
7511
7512            if (!passed) {
7513                throw std::runtime_error("pd_test_radd_dataframe failed");
7514            }
7515
7516            std::cout << " -> tests passed" << std::endl;
7517        }
7518
7519        void pd_test_rsub_scalar() {
7520            std::cout << "========= rsub() with scalar =====================";
7521
7522            std::map<std::string, std::vector<double>> data = {
7523                {"A", {1.0, 2.0, 3.0}},
7524                {"B", {4.0, 5.0, 6.0}}
7525            };
7526            pandas::DataFrame df(data);
7527
7528            // df.rsub(10) = 10 - df
7529            pandas::DataFrame result = df.rsub(10.0);
rsub (pd_test_2_all.cpp:7520)
7510            }
7511
7512            if (!passed) {
7513                throw std::runtime_error("pd_test_radd_dataframe failed");
7514            }
7515
7516            std::cout << " -> tests passed" << std::endl;
7517        }
7518
7519        void pd_test_rsub_scalar() {
7520            std::cout << "========= rsub() with scalar =====================";
7521
7522            std::map<std::string, std::vector<double>> data = {
7523                {"A", {1.0, 2.0, 3.0}},
7524                {"B", {4.0, 5.0, 6.0}}
7525            };
7526            pandas::DataFrame df(data);
7527
7528            // df.rsub(10) = 10 - df
7529            pandas::DataFrame result = df.rsub(10.0);
rtruediv (pd_test_2_all.cpp:7795)
7785            }
7786
7787            if (!passed) {
7788                throw std::runtime_error("pd_test_rdiv_dataframe failed");
7789            }
7790
7791            std::cout << " -> tests passed" << std::endl;
7792        }
7793
7794        void pd_test_rtruediv_scalar() {
7795            std::cout << "========= rtruediv() with scalar =================";
7796
7797            std::map<std::string, std::vector<double>> data = {
7798                {"A", {2.0, 4.0}}
7799            };
7800            pandas::DataFrame df(data);
7801
7802            // rtruediv is alias for rdiv
7803            pandas::DataFrame result = df.rtruediv(10.0);
7804
7805            bool passed = true;
rtruediv (pd_test_2_all.cpp:7795)
7785            }
7786
7787            if (!passed) {
7788                throw std::runtime_error("pd_test_rdiv_dataframe failed");
7789            }
7790
7791            std::cout << " -> tests passed" << std::endl;
7792        }
7793
7794        void pd_test_rtruediv_scalar() {
7795            std::cout << "========= rtruediv() with scalar =================";
7796
7797            std::map<std::string, std::vector<double>> data = {
7798                {"A", {2.0, 4.0}}
7799            };
7800            pandas::DataFrame df(data);
7801
7802            // rtruediv is alias for rdiv
7803            pandas::DataFrame result = df.rtruediv(10.0);
7804
7805            bool passed = true;
sub (pd_test_1_all.cpp:4851)
4841            pandas::Series<double> a({1.0, 2.0, 3.0});
4842            pandas::Series<double> b({4.0, 5.0, 6.0});
4843
4844            auto sum = a.add(b);
4845            bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4846            if (!passed) {
4847                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
sub (pd_test_1_all.cpp:4851)
4841            pandas::Series<double> a({1.0, 2.0, 3.0});
4842            pandas::Series<double> b({4.0, 5.0, 6.0});
4843
4844            auto sum = a.add(b);
4845            bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4846            if (!passed) {
4847                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
sub (pd_test_1_all.cpp:4851)
4841            pandas::Series<double> a({1.0, 2.0, 3.0});
4842            pandas::Series<double> b({4.0, 5.0, 6.0});
4843
4844            auto sum = a.add(b);
4845            bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4846            if (!passed) {
4847                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
subtract (pd_test_3_all.cpp:531)
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524    std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526    std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527    std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528    pandas::Series<double> s1(vals1, "s1");
529    pandas::Series<double> s2(vals2, "s2");
530
531    // Test subtract()
532    pandas::Series<double> sub_result = s1.subtract(s2);
533    if (sub_result.size() != 4) {
534        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536    }
537    // 10-2=8
538    if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541    }
subtract (pd_test_3_all.cpp:531)
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524    std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526    std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527    std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528    pandas::Series<double> s1(vals1, "s1");
529    pandas::Series<double> s2(vals2, "s2");
530
531    // Test subtract()
532    pandas::Series<double> sub_result = s1.subtract(s2);
533    if (sub_result.size() != 4) {
534        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536    }
537    // 10-2=8
538    if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541    }
truediv (pd_test_3_all.cpp:524)
514    }
515
516    std::cout << " -> tests passed" << std::endl;
517}
518
519// ============================================================================
520// Category 3: Series Arithmetic Operations
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524    std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526    std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527    std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528    pandas::Series<double> s1(vals1, "s1");
529    pandas::Series<double> s2(vals2, "s2");
530
531    // Test subtract()
532    pandas::Series<double> sub_result = s1.subtract(s2);
533    if (sub_result.size() != 4) {
534        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
truediv (pd_test_3_all.cpp:524)
514    }
515
516    std::cout << " -> tests passed" << std::endl;
517}
518
519// ============================================================================
520// Category 3: Series Arithmetic Operations
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524    std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526    std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527    std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528    pandas::Series<double> s1(vals1, "s1");
529    pandas::Series<double> s2(vals2, "s2");
530
531    // Test subtract()
532    pandas::Series<double> sub_result = s1.subtract(s2);
533    if (sub_result.size() != 4) {
534        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
compare (pd_test_1_all.cpp:13989)
13979            if (!approx_equal(std::stod(b_col.get_value_str(0)), 10.0)) {
13980                passed = false;
13981                std::cout << "  [FAIL] : in pd_test_joining_update() : column B was changed" << std::endl;
13982                throw std::runtime_error("pd_test_joining_update failed: B changed");
13983            }
13984
13985            std::cout << " -> tests passed" << std::endl;
13986        }
13987
13988        // =====================================================================
13989        // compare() Tests
13990        // =====================================================================
13991
13992        void pd_test_joining_compare() {
13993            std::cout << "========= compare =====================================";
13994
13995            std::map<std::string, std::vector<double>> left_data = {
13996                {"A", {1.0, 2.0, 3.0}},
13997                {"B", {10.0, 20.0, 30.0}}
13998            };
13999            pandas::DataFrame left(left_data);
eq (pd_test_2_all.cpp:19680)
19670    std::vector<pandas::Series<numpy::float64>> cols;
19671    cols.push_back(pandas::Series<numpy::float64>({1.0, 2.0}, "A"));
19672    cols.push_back(pandas::Series<numpy::float64>({3.0, 3.0}, "B"));
19673    pandas::DataFrame df(cols, {"A", "B"});
19674
19675    pandas::Series<numpy::float64> s({1.0, 3.0}, "vals");
19676    s.set_index(std::make_unique<pandas::Index<std::string>>(
19677        std::vector<std::string>{"A", "B"}));
19678
19679    auto result = df.eq(s, 1);
19680
19681    check(approx(result["A"].get_value_double(0), 1.0), "eq_A_r0_true");
19682    check(approx(result["A"].get_value_double(1), 0.0), "eq_A_r1_false");
19683    check(approx(result["B"].get_value_double(0), 1.0), "eq_B_r0_true");
19684    check(approx(result["B"].get_value_double(1), 1.0), "eq_B_r1_true");
19685}
19686
19687// Test 5: mul scalar broadcast (verify existing behavior still works)
19688void pd_test_broadcasting_mul_scalar() {
19689    std::cout << "  -- pd_test_broadcasting_mul_scalar --" << std::endl;
eq (pd_test_2_all.cpp:19680)
19670    std::vector<pandas::Series<numpy::float64>> cols;
19671    cols.push_back(pandas::Series<numpy::float64>({1.0, 2.0}, "A"));
19672    cols.push_back(pandas::Series<numpy::float64>({3.0, 3.0}, "B"));
19673    pandas::DataFrame df(cols, {"A", "B"});
19674
19675    pandas::Series<numpy::float64> s({1.0, 3.0}, "vals");
19676    s.set_index(std::make_unique<pandas::Index<std::string>>(
19677        std::vector<std::string>{"A", "B"}));
19678
19679    auto result = df.eq(s, 1);
19680
19681    check(approx(result["A"].get_value_double(0), 1.0), "eq_A_r0_true");
19682    check(approx(result["A"].get_value_double(1), 0.0), "eq_A_r1_false");
19683    check(approx(result["B"].get_value_double(0), 1.0), "eq_B_r0_true");
19684    check(approx(result["B"].get_value_double(1), 1.0), "eq_B_r1_true");
19685}
19686
19687// Test 5: mul scalar broadcast (verify existing behavior still works)
19688void pd_test_broadcasting_mul_scalar() {
19689    std::cout << "  -- pd_test_broadcasting_mul_scalar --" << std::endl;
eq (pd_test_2_all.cpp:19680)
19670    std::vector<pandas::Series<numpy::float64>> cols;
19671    cols.push_back(pandas::Series<numpy::float64>({1.0, 2.0}, "A"));
19672    cols.push_back(pandas::Series<numpy::float64>({3.0, 3.0}, "B"));
19673    pandas::DataFrame df(cols, {"A", "B"});
19674
19675    pandas::Series<numpy::float64> s({1.0, 3.0}, "vals");
19676    s.set_index(std::make_unique<pandas::Index<std::string>>(
19677        std::vector<std::string>{"A", "B"}));
19678
19679    auto result = df.eq(s, 1);
19680
19681    check(approx(result["A"].get_value_double(0), 1.0), "eq_A_r0_true");
19682    check(approx(result["A"].get_value_double(1), 0.0), "eq_A_r1_false");
19683    check(approx(result["B"].get_value_double(0), 1.0), "eq_B_r0_true");
19684    check(approx(result["B"].get_value_double(1), 1.0), "eq_B_r1_true");
19685}
19686
19687// Test 5: mul scalar broadcast (verify existing behavior still works)
19688void pd_test_broadcasting_mul_scalar() {
19689    std::cout << "  -- pd_test_broadcasting_mul_scalar --" << std::endl;
equals (pd_test_1_all.cpp:5866)
5856    std::cout << "========= equals ======================================";
5857
5858    pandas::CategoricalArray arr1({"a", "b", "a"});
5859    pandas::CategoricalArray arr2({"a", "b", "a"});
5860    pandas::CategoricalArray arr3({"a", "b", "c"});
5861
5862    pandas::CategoricalIndex idx1(arr1);
5863    pandas::CategoricalIndex idx2(arr2);
5864    pandas::CategoricalIndex idx3(arr3);
5865
5866    bool passed = (idx1.equals(idx2) && !idx1.equals(idx3));
5867    if (!passed) {
5868        std::cout << "  [FAIL] : in pd_test_categorical_index_equals()" << std::endl;
5869        throw std::runtime_error("pd_test_categorical_index_equals failed");
5870    }
5871
5872    std::cout << " -> tests passed" << std::endl;
5873}
5874
5875void pd_test_categorical_index_identical() {
5876    std::cout << "========= identical ===================================";
ge (pd_test_3_all.cpp:303)
293    }
294
295    std::cout << " -> tests passed" << std::endl;
296}
297
298// ============================================================================
299// Category 2: DataFrame Comparison Operations
300// ============================================================================
301
302void pd_test_3_all_comparison_ops() {
303    std::cout << "========= DataFrame.eq/ne/lt/le/gt/ge() =============";
304
305    std::map<std::string, std::vector<double>> data1 = {
306        {"A", {1.0, 2.0, 3.0}},
307        {"B", {4.0, 5.0, 6.0}}
308    };
309    std::map<std::string, std::vector<double>> data2 = {
310        {"A", {1.0, 3.0, 3.0}},
311        {"B", {4.0, 4.0, 7.0}}
312    };
313    pandas::DataFrame df1(data1);
ge (pd_test_3_all.cpp:303)
293    }
294
295    std::cout << " -> tests passed" << std::endl;
296}
297
298// ============================================================================
299// Category 2: DataFrame Comparison Operations
300// ============================================================================
301
302void pd_test_3_all_comparison_ops() {
303    std::cout << "========= DataFrame.eq/ne/lt/le/gt/ge() =============";
304
305    std::map<std::string, std::vector<double>> data1 = {
306        {"A", {1.0, 2.0, 3.0}},
307        {"B", {4.0, 5.0, 6.0}}
308    };
309    std::map<std::string, std::vector<double>> data2 = {
310        {"A", {1.0, 3.0, 3.0}},
311        {"B", {4.0, 4.0, 7.0}}
312    };
313    pandas::DataFrame df1(data1);
ge (pd_test_3_all.cpp:303)
293    }
294
295    std::cout << " -> tests passed" << std::endl;
296}
297
298// ============================================================================
299// Category 2: DataFrame Comparison Operations
300// ============================================================================
301
302void pd_test_3_all_comparison_ops() {
303    std::cout << "========= DataFrame.eq/ne/lt/le/gt/ge() =============";
304
305    std::map<std::string, std::vector<double>> data1 = {
306        {"A", {1.0, 2.0, 3.0}},
307        {"B", {4.0, 5.0, 6.0}}
308    };
309    std::map<std::string, std::vector<double>> data2 = {
310        {"A", {1.0, 3.0, 3.0}},
311        {"B", {4.0, 4.0, 7.0}}
312    };
313    pandas::DataFrame df1(data1);
gen (pd_test_5_all.cpp:35852)
35842    double pc = pct_change_pc(a, b);
35843    double pd = pct_change_pd(a, b);
35844    pandas_tests::check(std::abs(pc - pd) < 1e-12,
35845                        "case_12.formulas_within_ULP", local_fail);
35846}
35847
35848void bin_edge_412638_case_13_entropy_pct_change_invariance(int& local_fail) {
35849    // Generate prices via deterministic walk; compute returns by both
35850    // formulas; bin both; entropy should be IDENTICAL (bin assignments
35851    // not shifted by ULP-scale formula drift). Cycle-1 finding.
35852    std::mt19937_64 gen(42);
35853    std::normal_distribution<double> nd(0.0003, 0.02);
35854    std::vector<double> prices;
35855    prices.reserve(500);
35856    double s = 100.0;
35857    for (int i = 0; i < 500; ++i) {
35858        if (i > 0) s = s * std::exp(nd(gen));
35859        prices.push_back(s);
35860    }
35861    std::vector<double> r_pc, r_pd;
35862    for (size_t i = 1; i < prices.size(); ++i) {
gt (pd_test_3_all.cpp:344)
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341        throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342    }
343
344    // Test gt()
345    pandas::DataFrame gt_result = df1.gt(df2);
346    if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
348        throw std::runtime_error("pd_test_3_all_comparison_ops failed: gt() shape");
349    }
350
351    // Test ge()
352    pandas::DataFrame ge_result = df1.ge(df2);
353    if (ge_result.nrows() != 3 || ge_result.ncols() != 2) {
354        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ge() shape mismatch" << std::endl;
gt (pd_test_3_all.cpp:344)
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341        throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342    }
343
344    // Test gt()
345    pandas::DataFrame gt_result = df1.gt(df2);
346    if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
348        throw std::runtime_error("pd_test_3_all_comparison_ops failed: gt() shape");
349    }
350
351    // Test ge()
352    pandas::DataFrame ge_result = df1.ge(df2);
353    if (ge_result.nrows() != 3 || ge_result.ncols() != 2) {
354        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ge() shape mismatch" << std::endl;
gt (pd_test_3_all.cpp:344)
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341        throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342    }
343
344    // Test gt()
345    pandas::DataFrame gt_result = df1.gt(df2);
346    if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
348        throw std::runtime_error("pd_test_3_all_comparison_ops failed: gt() shape");
349    }
350
351    // Test ge()
352    pandas::DataFrame ge_result = df1.ge(df2);
353    if (ge_result.nrows() != 3 || ge_result.ncols() != 2) {
354        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ge() shape mismatch" << std::endl;
le (pd_test_3_all.cpp:337)
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341        throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342    }
343
344    // Test gt()
345    pandas::DataFrame gt_result = df1.gt(df2);
346    if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
le (pd_test_3_all.cpp:337)
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341        throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342    }
343
344    // Test gt()
345    pandas::DataFrame gt_result = df1.gt(df2);
346    if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
le (pd_test_3_all.cpp:337)
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341        throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342    }
343
344    // Test gt()
345    pandas::DataFrame gt_result = df1.gt(df2);
346    if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
levels (pd_test_2_all.cpp:9787)
9777            pandas::DataFrame df(data);
9778
9779            std::vector<std::string> hier_index = {
9780                "Final exam:History:January",
9781                "Final exam:Geography:February",
9782                "Coursework:History:March",
9783                "Coursework:Geography:April"
9784            };
9785            df.set_index(std::make_unique<pandas::Index<std::string>>(hier_index));
9786
9787            // Default: swap last two levels (i=-2, j=-1)
9788            pandas::DataFrame result = df.swaplevel();
9789
9790            std::string idx0 = result.index().get_value_str(0);
9791            std::string idx1 = result.index().get_value_str(1);
9792            std::string idx2 = result.index().get_value_str(2);
9793            std::string idx3 = result.index().get_value_str(3);
9794
9795            bool passed = (idx0 == "Final exam:January:History" &&
9796                           idx1 == "Final exam:February:Geography" &&
9797                           idx2 == "Coursework:March:History" &&
levels (pd_test_2_all.cpp:9787)
9777            pandas::DataFrame df(data);
9778
9779            std::vector<std::string> hier_index = {
9780                "Final exam:History:January",
9781                "Final exam:Geography:February",
9782                "Coursework:History:March",
9783                "Coursework:Geography:April"
9784            };
9785            df.set_index(std::make_unique<pandas::Index<std::string>>(hier_index));
9786
9787            // Default: swap last two levels (i=-2, j=-1)
9788            pandas::DataFrame result = df.swaplevel();
9789
9790            std::string idx0 = result.index().get_value_str(0);
9791            std::string idx1 = result.index().get_value_str(1);
9792            std::string idx2 = result.index().get_value_str(2);
9793            std::string idx3 = result.index().get_value_str(3);
9794
9795            bool passed = (idx0 == "Final exam:January:History" &&
9796                           idx1 == "Final exam:February:Geography" &&
9797                           idx2 == "Coursework:March:History" &&
lt (pd_test_3_all.cpp:330)
320        throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321    }
322
323    // Test ne()
324    pandas::DataFrame ne_result = df1.ne(df2);
325    if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
lt (pd_test_3_all.cpp:330)
320        throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321    }
322
323    // Test ne()
324    pandas::DataFrame ne_result = df1.ne(df2);
325    if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
lt (pd_test_3_all.cpp:330)
320        throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321    }
322
323    // Test ne()
324    pandas::DataFrame ne_result = df1.ne(df2);
325    if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
ne (pd_test_3_all.cpp:323)
313    pandas::DataFrame df1(data1);
314    pandas::DataFrame df2(data2);
315
316    // Test eq()
317    pandas::DataFrame eq_result = df1.eq(df2);
318    if (eq_result.nrows() != 3 || eq_result.ncols() != 2) {
319        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : eq() shape mismatch" << std::endl;
320        throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321    }
322
323    // Test ne()
324    pandas::DataFrame ne_result = df1.ne(df2);
325    if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
ne (pd_test_3_all.cpp:323)
313    pandas::DataFrame df1(data1);
314    pandas::DataFrame df2(data2);
315
316    // Test eq()
317    pandas::DataFrame eq_result = df1.eq(df2);
318    if (eq_result.nrows() != 3 || eq_result.ncols() != 2) {
319        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : eq() shape mismatch" << std::endl;
320        throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321    }
322
323    // Test ne()
324    pandas::DataFrame ne_result = df1.ne(df2);
325    if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
ne (pd_test_3_all.cpp:323)
313    pandas::DataFrame df1(data1);
314    pandas::DataFrame df2(data2);
315
316    // Test eq()
317    pandas::DataFrame eq_result = df1.eq(df2);
318    if (eq_result.nrows() != 3 || eq_result.ncols() != 2) {
319        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : eq() shape mismatch" << std::endl;
320        throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321    }
322
323    // Test ne()
324    pandas::DataFrame ne_result = df1.ne(df2);
325    if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
rank (pd_test_1_all.cpp:6451)
6441        // =====================================================================
6442        // Test: Rank
6443        // =====================================================================
6444        void pd_test_dataframe_rank() {
6445            std::cout << "========= rank =============================";
6446
6447            // Test Series rank with default method (average)
6448            {
6449                std::vector<double> data = {3.0, 1.0, 4.0, 1.0, 5.0};
6450                pandas::Series<double> s(data, "test");
6451                auto ranked = s.rank();
6452
6453                // Values: 3, 1, 4, 1, 5 -> Sorted: 1, 1, 3, 4, 5
6454                // Ranks (average): 1.5, 1.5, 3, 4, 5
6455                // Original positions: 3->3, 1->1.5, 4->4, 1->1.5, 5->5
6456                double r0 = std::stod(ranked.get_value_str(0));  // 3.0 -> rank 3
6457                double r1 = std::stod(ranked.get_value_str(1));  // 1.0 -> rank 1.5
6458
6459                if (std::abs(r0 - 3.0) > 1e-10) {
6460                    std::cout << "  [FAIL] : in pd_test_dataframe_rank() : value 3.0 should have rank 3, got " << r0 << std::endl;
6461                    throw std::runtime_error("pd_test_dataframe_rank failed: value 3.0 rank");
sort_index (pd_test_3_all.cpp:583)
573    // 10/2=5
574    if (std::abs(truediv_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
575        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : truediv() value mismatch" << std::endl;
576        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: truediv() value");
577    }
578
579    std::cout << " -> tests passed" << std::endl;
580}
581
582void pd_test_3_all_series_sort_index() {
583    std::cout << "========= Series.sort_index() ========================";
584
585    // NOTE: Series.sort_index() has an implementation issue:
586    // It calls index_->argsort() but argsort() is not virtual in IndexBase.
587    // This test verifies the function signature exists.
588    // When the implementation is fixed, this test should be updated.
589
590    std::vector<double> vals = {30.0, 10.0, 20.0};
591    pandas::Series<double> s(vals, "test");
592
593    // Verify the Series was created correctly
sort_index (pd_test_3_all.cpp:583)
573    // 10/2=5
574    if (std::abs(truediv_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
575        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : truediv() value mismatch" << std::endl;
576        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: truediv() value");
577    }
578
579    std::cout << " -> tests passed" << std::endl;
580}
581
582void pd_test_3_all_series_sort_index() {
583    std::cout << "========= Series.sort_index() ========================";
584
585    // NOTE: Series.sort_index() has an implementation issue:
586    // It calls index_->argsort() but argsort() is not virtual in IndexBase.
587    // This test verifies the function signature exists.
588    // When the implementation is fixed, this test should be updated.
589
590    std::vector<double> vals = {30.0, 10.0, 20.0};
591    pandas::Series<double> s(vals, "test");
592
593    // Verify the Series was created correctly
sort_index (pd_test_3_all.cpp:583)
573    // 10/2=5
574    if (std::abs(truediv_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
575        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : truediv() value mismatch" << std::endl;
576        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: truediv() value");
577    }
578
579    std::cout << " -> tests passed" << std::endl;
580}
581
582void pd_test_3_all_series_sort_index() {
583    std::cout << "========= Series.sort_index() ========================";
584
585    // NOTE: Series.sort_index() has an implementation issue:
586    // It calls index_->argsort() but argsort() is not virtual in IndexBase.
587    // This test verifies the function signature exists.
588    // When the implementation is fixed, this test should be updated.
589
590    std::vector<double> vals = {30.0, 10.0, 20.0};
591    pandas::Series<double> s(vals, "test");
592
593    // Verify the Series was created correctly
sort_values (pd_test_1_all.cpp:6408)
6398        void pd_test_dataframe_sorting() {
6399            std::cout << "========= sorting ==========================";
6400
6401            std::map<std::string, std::vector<numpy::float64>> data;
6402            data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403            data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405            pandas::DataFrame df(data);
6406
6407            // Test sort_values ascending
6408            auto sorted_asc = df.sort_values("A", true);
6409            // First value should be smallest (1.0)
6410            std::string first_val = sorted_asc["A"].get_value_str(0);
6411            if (std::stod(first_val) != 1.0) {
6412                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414            }
6415
6416            // Test sort_values descending
6417            auto sorted_desc = df.sort_values("A", false);
6418            first_val = sorted_desc["A"].get_value_str(0);
sort_values (pd_test_1_all.cpp:6408)
6398        void pd_test_dataframe_sorting() {
6399            std::cout << "========= sorting ==========================";
6400
6401            std::map<std::string, std::vector<numpy::float64>> data;
6402            data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403            data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405            pandas::DataFrame df(data);
6406
6407            // Test sort_values ascending
6408            auto sorted_asc = df.sort_values("A", true);
6409            // First value should be smallest (1.0)
6410            std::string first_val = sorted_asc["A"].get_value_str(0);
6411            if (std::stod(first_val) != 1.0) {
6412                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414            }
6415
6416            // Test sort_values descending
6417            auto sorted_desc = df.sort_values("A", false);
6418            first_val = sorted_desc["A"].get_value_str(0);
sort_values (pd_test_1_all.cpp:6408)
6398        void pd_test_dataframe_sorting() {
6399            std::cout << "========= sorting ==========================";
6400
6401            std::map<std::string, std::vector<numpy::float64>> data;
6402            data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403            data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405            pandas::DataFrame df(data);
6406
6407            // Test sort_values ascending
6408            auto sorted_asc = df.sort_values("A", true);
6409            // First value should be smallest (1.0)
6410            std::string first_val = sorted_asc["A"].get_value_str(0);
6411            if (std::stod(first_val) != 1.0) {
6412                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414            }
6415
6416            // Test sort_values descending
6417            auto sorted_desc = df.sort_values("A", false);
6418            first_val = sorted_desc["A"].get_value_str(0);
sort_values (pd_test_1_all.cpp:6408)
6398        void pd_test_dataframe_sorting() {
6399            std::cout << "========= sorting ==========================";
6400
6401            std::map<std::string, std::vector<numpy::float64>> data;
6402            data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403            data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405            pandas::DataFrame df(data);
6406
6407            // Test sort_values ascending
6408            auto sorted_asc = df.sort_values("A", true);
6409            // First value should be smallest (1.0)
6410            std::string first_val = sorted_asc["A"].get_value_str(0);
6411            if (std::stod(first_val) != 1.0) {
6412                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414            }
6415
6416            // Test sort_values descending
6417            auto sorted_desc = df.sort_values("A", false);
6418            first_val = sorted_desc["A"].get_value_str(0);
sort_values (pd_test_1_all.cpp:6408)
6398        void pd_test_dataframe_sorting() {
6399            std::cout << "========= sorting ==========================";
6400
6401            std::map<std::string, std::vector<numpy::float64>> data;
6402            data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403            data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405            pandas::DataFrame df(data);
6406
6407            // Test sort_values ascending
6408            auto sorted_asc = df.sort_values("A", true);
6409            // First value should be smallest (1.0)
6410            std::string first_val = sorted_asc["A"].get_value_str(0);
6411            if (std::stod(first_val) != 1.0) {
6412                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414            }
6415
6416            // Test sort_values descending
6417            auto sorted_desc = df.sort_values("A", false);
6418            first_val = sorted_desc["A"].get_value_str(0);
sort_values (pd_test_1_all.cpp:6408)
6398        void pd_test_dataframe_sorting() {
6399            std::cout << "========= sorting ==========================";
6400
6401            std::map<std::string, std::vector<numpy::float64>> data;
6402            data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403            data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405            pandas::DataFrame df(data);
6406
6407            // Test sort_values ascending
6408            auto sorted_asc = df.sort_values("A", true);
6409            // First value should be smallest (1.0)
6410            std::string first_val = sorted_asc["A"].get_value_str(0);
6411            if (std::stod(first_val) != 1.0) {
6412                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414            }
6415
6416            // Test sort_values descending
6417            auto sorted_desc = df.sort_values("A", false);
6418            first_val = sorted_desc["A"].get_value_str(0);
sort_values_by_transformed (pd_test_2_all.cpp:22232)
22222    }
22223    std::cout << "====================================== [OK] pd_test_series_dtype_inference test suite ========================== " << std::endl;
22224    return 0;
22225}
22226
22227} // namespace dataframe_tests
22228// ------------------- pd_test_series_dtype_inference.cpp (end) -----------------------------
22229
22230// ------------------- pd_test_sort_key.cpp (start) -----------------------------
22231// pd_test_sort_key.cpp - Tests for sort_values key function support
22232// Tests sort_values_by_transformed() and resolve_sort_columns_multiindex()
22233
22234#include <iostream>
22235#include <string>
22236#include <vector>
22237#include <cmath>
22238#include <numeric>
22239
22240#include "../pandas/pd_dataframe.h"
22241
22242namespace dataframe_tests {
T (pd_test_1_all.cpp:128)
118            throw std::runtime_error("pd_test_boolean_array_kleene_and failed: NA & F");
119        }
120
121        std::cout << " -> tests passed" << std::endl;
122    }
123
124    void pd_test_boolean_array_kleene_or() {
125        std::cout << "========= BooleanArray: Kleene OR ======================= ";
126
127        // Kleene OR truth table:
128        // T | T = T, T | F = T, T | NA = T (True dominates)
129        // F | T = T, F | F = F, F | NA = NA
130        // NA | T = T, NA | F = NA, NA | NA = NA
131
132        pandas::BooleanArray t({std::optional<bool>(true)});
133        pandas::BooleanArray f({std::optional<bool>(false)});
134        pandas::BooleanArray na({std::nullopt});
135
136        // T | NA = T (True dominates)
137        auto tna = (t | na);
138        if (!tna[0].has_value() || !tna[0].value()) {
explode (pd_test_1_all.cpp:6868)
6858                }
6859            }
6860
6861            // Test explode
6862            {
6863                std::map<std::string, std::vector<std::string>> data;
6864                data["id"] = {"1", "2"};
6865                data["tags"] = {"a,b,c", "d,e"};
6866                pandas::DataFrame df(data);
6867
6868                auto exploded = df.explode("tags");
6869                if (exploded.nrows() != 5) {  // 3 + 2 = 5 rows
6870                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : explode nrows != 5, got " << exploded.nrows() << std::endl;
6871                    throw std::runtime_error("pd_test_dataframe_reshape failed: explode nrows");
6872                }
6873            }
6874
6875            // Test squeeze
6876            {
6877                std::map<std::string, std::vector<int>> data;
6878                data["A"] = {1};
melt (pd_test_1_all.cpp:6846)
6836            }
6837
6838            // Test melt
6839            {
6840                std::map<std::string, std::vector<int>> data;
6841                data["id"] = {1, 2};
6842                data["A"] = {10, 20};
6843                data["B"] = {30, 40};
6844                pandas::DataFrame df(data);
6845
6846                auto melted = df.melt({"id"});
6847                if (melted.nrows() != 4) {  // 2 ids * 2 value columns
6848                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt nrows != 4, got " << melted.nrows() << std::endl;
6849                    throw std::runtime_error("pd_test_dataframe_reshape failed: melt nrows");
6850                }
6851                if (!melted.has_column("variable")) {
6852                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt missing 'variable' column" << std::endl;
6853                    throw std::runtime_error("pd_test_dataframe_reshape failed: melt variable column");
6854                }
6855                if (!melted.has_column("value")) {
6856                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt missing 'value' column" << std::endl;
melt (pd_test_1_all.cpp:6846)
6836            }
6837
6838            // Test melt
6839            {
6840                std::map<std::string, std::vector<int>> data;
6841                data["id"] = {1, 2};
6842                data["A"] = {10, 20};
6843                data["B"] = {30, 40};
6844                pandas::DataFrame df(data);
6845
6846                auto melted = df.melt({"id"});
6847                if (melted.nrows() != 4) {  // 2 ids * 2 value columns
6848                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt nrows != 4, got " << melted.nrows() << std::endl;
6849                    throw std::runtime_error("pd_test_dataframe_reshape failed: melt nrows");
6850                }
6851                if (!melted.has_column("variable")) {
6852                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt missing 'variable' column" << std::endl;
6853                    throw std::runtime_error("pd_test_dataframe_reshape failed: melt variable column");
6854                }
6855                if (!melted.has_column("value")) {
6856                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt missing 'value' column" << std::endl;
melt (pd_test_1_all.cpp:6846)
6836            }
6837
6838            // Test melt
6839            {
6840                std::map<std::string, std::vector<int>> data;
6841                data["id"] = {1, 2};
6842                data["A"] = {10, 20};
6843                data["B"] = {30, 40};
6844                pandas::DataFrame df(data);
6845
6846                auto melted = df.melt({"id"});
6847                if (melted.nrows() != 4) {  // 2 ids * 2 value columns
6848                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt nrows != 4, got " << melted.nrows() << std::endl;
6849                    throw std::runtime_error("pd_test_dataframe_reshape failed: melt nrows");
6850                }
6851                if (!melted.has_column("variable")) {
6852                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt missing 'variable' column" << std::endl;
6853                    throw std::runtime_error("pd_test_dataframe_reshape failed: melt variable column");
6854                }
6855                if (!melted.has_column("value")) {
6856                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt missing 'value' column" << std::endl;
melt (pd_test_1_all.cpp:6846)
6836            }
6837
6838            // Test melt
6839            {
6840                std::map<std::string, std::vector<int>> data;
6841                data["id"] = {1, 2};
6842                data["A"] = {10, 20};
6843                data["B"] = {30, 40};
6844                pandas::DataFrame df(data);
6845
6846                auto melted = df.melt({"id"});
6847                if (melted.nrows() != 4) {  // 2 ids * 2 value columns
6848                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt nrows != 4, got " << melted.nrows() << std::endl;
6849                    throw std::runtime_error("pd_test_dataframe_reshape failed: melt nrows");
6850                }
6851                if (!melted.has_column("variable")) {
6852                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt missing 'variable' column" << std::endl;
6853                    throw std::runtime_error("pd_test_dataframe_reshape failed: melt variable column");
6854                }
6855                if (!melted.has_column("value")) {
6856                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : melt missing 'value' column" << std::endl;
pivot (pd_test_1_all.cpp:6827)
6817            std::cout << "========= reshaping ========================";
6818
6819            // Test pivot
6820            {
6821                std::map<std::string, std::vector<std::string>> data;
6822                data["date"] = {"2020-01", "2020-01", "2020-02", "2020-02"};
6823                data["city"] = {"NYC", "LA", "NYC", "LA"};
6824                data["temp"] = {"30", "65", "35", "70"};
6825                pandas::DataFrame df(data);
6826
6827                auto pivoted = df.pivot("date", "city", "temp");
6828                if (pivoted.ncols() != 2) {  // LA, NYC (alphabetical)
6829                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : pivot ncols != 2, got " << pivoted.ncols() << std::endl;
6830                    throw std::runtime_error("pd_test_dataframe_reshape failed: pivot ncols");
6831                }
6832                if (pivoted.nrows() != 2) {  // 2020-01, 2020-02
6833                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : pivot nrows != 2, got " << pivoted.nrows() << std::endl;
6834                    throw std::runtime_error("pd_test_dataframe_reshape failed: pivot nrows");
6835                }
6836            }
pivot_table (pd_test_1_all.cpp:25691)
25681namespace dataframe_tests {
25682    namespace dataframe_tests_pivot_table {
25683
25684        bool approx_equal(double a, double b, double tol = 1e-9) {
25685            if (std::isnan(a) && std::isnan(b)) return true;
25686            if (std::isnan(a) || std::isnan(b)) return false;
25687            return std::abs(a - b) < tol;
25688        }
25689
25690        void pd_test_pivot_table_sum() {
25691            std::cout << "========= pivot_table (sum) ================================";
25692
25693            // Create test data: region, product, sales (numeric values for aggregation)
25694            pandas::DataFrame df;
25695            df.add_column<std::string>("region", {"East", "East", "East", "West", "West"});
25696            df.add_column<std::string>("product", {"A", "A", "B", "A", "B"});
25697            df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 120.0, 80.0});
25698
25699            // Pivot with sum aggregation
25700            pandas::DataFrame result = df.pivot_table("sales", "region", "product", "sum");
pivot_table (pd_test_1_all.cpp:25691)
25681namespace dataframe_tests {
25682    namespace dataframe_tests_pivot_table {
25683
25684        bool approx_equal(double a, double b, double tol = 1e-9) {
25685            if (std::isnan(a) && std::isnan(b)) return true;
25686            if (std::isnan(a) || std::isnan(b)) return false;
25687            return std::abs(a - b) < tol;
25688        }
25689
25690        void pd_test_pivot_table_sum() {
25691            std::cout << "========= pivot_table (sum) ================================";
25692
25693            // Create test data: region, product, sales (numeric values for aggregation)
25694            pandas::DataFrame df;
25695            df.add_column<std::string>("region", {"East", "East", "East", "West", "West"});
25696            df.add_column<std::string>("product", {"A", "A", "B", "A", "B"});
25697            df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 120.0, 80.0});
25698
25699            // Pivot with sum aggregation
25700            pandas::DataFrame result = df.pivot_table("sales", "region", "product", "sum");
pivot_table (pd_test_1_all.cpp:25691)
25681namespace dataframe_tests {
25682    namespace dataframe_tests_pivot_table {
25683
25684        bool approx_equal(double a, double b, double tol = 1e-9) {
25685            if (std::isnan(a) && std::isnan(b)) return true;
25686            if (std::isnan(a) || std::isnan(b)) return false;
25687            return std::abs(a - b) < tol;
25688        }
25689
25690        void pd_test_pivot_table_sum() {
25691            std::cout << "========= pivot_table (sum) ================================";
25692
25693            // Create test data: region, product, sales (numeric values for aggregation)
25694            pandas::DataFrame df;
25695            df.add_column<std::string>("region", {"East", "East", "East", "West", "West"});
25696            df.add_column<std::string>("product", {"A", "A", "B", "A", "B"});
25697            df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 120.0, 80.0});
25698
25699            // Pivot with sum aggregation
25700            pandas::DataFrame result = df.pivot_table("sales", "region", "product", "sum");
pivot_table (pd_test_1_all.cpp:25691)
25681namespace dataframe_tests {
25682    namespace dataframe_tests_pivot_table {
25683
25684        bool approx_equal(double a, double b, double tol = 1e-9) {
25685            if (std::isnan(a) && std::isnan(b)) return true;
25686            if (std::isnan(a) || std::isnan(b)) return false;
25687            return std::abs(a - b) < tol;
25688        }
25689
25690        void pd_test_pivot_table_sum() {
25691            std::cout << "========= pivot_table (sum) ================================";
25692
25693            // Create test data: region, product, sales (numeric values for aggregation)
25694            pandas::DataFrame df;
25695            df.add_column<std::string>("region", {"East", "East", "East", "West", "West"});
25696            df.add_column<std::string>("product", {"A", "A", "B", "A", "B"});
25697            df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 120.0, 80.0});
25698
25699            // Pivot with sum aggregation
25700            pandas::DataFrame result = df.pivot_table("sales", "region", "product", "sum");
pivot_table (pd_test_1_all.cpp:25691)
25681namespace dataframe_tests {
25682    namespace dataframe_tests_pivot_table {
25683
25684        bool approx_equal(double a, double b, double tol = 1e-9) {
25685            if (std::isnan(a) && std::isnan(b)) return true;
25686            if (std::isnan(a) || std::isnan(b)) return false;
25687            return std::abs(a - b) < tol;
25688        }
25689
25690        void pd_test_pivot_table_sum() {
25691            std::cout << "========= pivot_table (sum) ================================";
25692
25693            // Create test data: region, product, sales (numeric values for aggregation)
25694            pandas::DataFrame df;
25695            df.add_column<std::string>("region", {"East", "East", "East", "West", "West"});
25696            df.add_column<std::string>("product", {"A", "A", "B", "A", "B"});
25697            df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 120.0, 80.0});
25698
25699            // Pivot with sum aggregation
25700            pandas::DataFrame result = df.pivot_table("sales", "region", "product", "sum");
pivot_table_multi_agg (pd_test_2_all.cpp:21310)
21300              << dataframe_tests_misc_migration::g_fail << " failed)" << std::endl;
21301
21302    return dataframe_tests_misc_migration::g_fail;
21303}
21304
21305} // namespace dataframe_tests
21306// ------------------- pd_test_misc_migration.cpp (end) -----------------------------
21307
21308// ------------------- pd_test_pivot_ext.cpp (start) -----------------------------
21309// pd_test_pivot_ext.cpp - Tests for pivot_table extensions
21310// Tests pivot_table_multi_agg(), pivot_table_with_grouper(), auto-detect numeric columns
21311
21312#include <iostream>
21313#include <string>
21314#include <vector>
21315#include <cmath>
21316#include <set>
21317
21318#include "../pandas/pd_dataframe.h"
21319#include "../pandas/pd_groupby.h"
pivot_table_with_margins (pd_test_3_all.cpp:7480)
7470    passed = (pivot_fill.nrows() == 2);
7471    if (!passed) {
7472        std::cout << "  [FAIL] : in pd_test_3_all_pivot_table() : fill_value test failed" << std::endl;
7473        throw std::runtime_error("pd_test_3_all_pivot_table failed: fill_value");
7474    }
7475
7476    std::cout << " -> tests passed" << std::endl;
7477}
7478
7479void pd_test_3_all_pivot_table_margins() {
7480    std::cout << "========= DataFrame.pivot_table_with_margins() =======================";
7481
7482    pandas::DataFrame df;
7483    df.add_column<std::string>("region", {"East", "East", "West", "West"});
7484    df.add_column<std::string>("product", {"A", "B", "A", "B"});
7485    df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 250.0});
7486
7487    // Test without margins (should be same as regular pivot_table)
7488    pandas::DataFrame pivot_no_margins = df.pivot_table_with_margins(
7489        "sales", "region", "product", "sum",
7490        std::numeric_limits<double>::quiet_NaN(), false
pivot_table_with_margins (pd_test_3_all.cpp:7480)
7470    passed = (pivot_fill.nrows() == 2);
7471    if (!passed) {
7472        std::cout << "  [FAIL] : in pd_test_3_all_pivot_table() : fill_value test failed" << std::endl;
7473        throw std::runtime_error("pd_test_3_all_pivot_table failed: fill_value");
7474    }
7475
7476    std::cout << " -> tests passed" << std::endl;
7477}
7478
7479void pd_test_3_all_pivot_table_margins() {
7480    std::cout << "========= DataFrame.pivot_table_with_margins() =======================";
7481
7482    pandas::DataFrame df;
7483    df.add_column<std::string>("region", {"East", "East", "West", "West"});
7484    df.add_column<std::string>("product", {"A", "B", "A", "B"});
7485    df.add_column<numpy::float64>("sales", {100.0, 150.0, 200.0, 250.0});
7486
7487    // Test without margins (should be same as regular pivot_table)
7488    pandas::DataFrame pivot_no_margins = df.pivot_table_with_margins(
7489        "sales", "region", "product", "sum",
7490        std::numeric_limits<double>::quiet_NaN(), false
squeeze (pd_test_1_all.cpp:6881)
6871                    throw std::runtime_error("pd_test_dataframe_reshape failed: explode nrows");
6872                }
6873            }
6874
6875            // Test squeeze
6876            {
6877                std::map<std::string, std::vector<int>> data;
6878                data["A"] = {1};
6879                pandas::DataFrame df(data);
6880
6881                auto squeezed = df.squeeze();
6882                // Should return without error for 1x1 DataFrame
6883            }
6884
6885            // Test stack
6886            {
6887                std::map<std::string, std::vector<int>> data;
6888                data["A"] = {1, 2};
6889                data["B"] = {3, 4};
6890                pandas::DataFrame df(data);
stack (pd_test_1_all.cpp:6892)
6882                // Should return without error for 1x1 DataFrame
6883            }
6884
6885            // Test stack
6886            {
6887                std::map<std::string, std::vector<int>> data;
6888                data["A"] = {1, 2};
6889                data["B"] = {3, 4};
6890                pandas::DataFrame df(data);
6891
6892                auto stacked = df.stack();
6893                // Stack should produce 4 rows (2 rows * 2 columns)
6894                if (stacked.nrows() != 4) {
6895                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : stack nrows != 4, got " << stacked.nrows() << std::endl;
6896                    throw std::runtime_error("pd_test_dataframe_reshape failed: stack nrows");
6897                }
6898            }
6899
6900            std::cout << " -> tests passed" << std::endl;
6901        }
stack_levels (pd_test_3_all.cpp:28695)
28685    };
28686    df.set_columns_levels(lvls, std::vector<std::string>{"l0", "l1"});
28687    return df;
28688}
28689
28690void pd_test_stack_single_level() {
28691    std::cout << "  -- pd_test_stack_single_level --" << std::endl;
28692    int fail = 0;
28693    auto df = make_ml_df();
28694    // Default -> stack innermost level (l1 = x,y) -> remaining columns = A,B
28695    auto r = df.stack_levels({-1}, true);
28696    fail += spt_check(r.ncols() == 2, "ncols==2");
28697    fail += spt_check(r.nrows() == 4, "nrows==4 (2 orig rows * 2 stack vals)");
28698    fail += spt_check(r.has_multiindex(), "multiindex present");
28699    if (fail == 0) std::cout << "    OK" << std::endl;
28700    if (fail != 0) throw std::runtime_error("pd_test_stack_single_level failed");
28701}
28702
28703void pd_test_stack_level_param() {
28704    std::cout << "  -- pd_test_stack_level_param --" << std::endl;
28705    int fail = 0;
swapaxes (pd_test_3_all.cpp:2276)
2266    auto sorted_desc = arr.sort_values(false, "last");
2267    if (*sorted_desc[0] != "c" || *sorted_desc[1] != "b" ||
2268        *sorted_desc[2] != "a" || sorted_desc[3].has_value()) {
2269        throw std::runtime_error("sort_values descending failed");
2270    }
2271
2272    std::cout << " -> tests passed" << std::endl;
2273}
2274
2275void pd_test_3_all_categorical_swapaxes() {
2276    std::cout << "========= CategoricalArray.swapaxes() =================";
2277
2278    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2279    pandas::CategoricalArray arr(values);
2280
2281    auto result = arr.swapaxes(0, 0);
2282    if (result.size() != 3) {
2283        throw std::runtime_error("swapaxes failed");
2284    }
2285
2286    bool threw = false;
transpose (pd_test_1_all.cpp:16648)
16638                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16639                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16640            }
16641
16642            passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16643            if (!passed) {
16644                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
16645                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() values");
16646            }
16647
16648            // Test transpose() alias
16649            auto transposed2 = s.transpose();
16650            passed = transposed2.size() == s.size();
16651            if (!passed) {
16652                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : transpose() size" << std::endl;
16653                throw std::runtime_error("pd_test_ndframe_transpose failed: transpose() size");
16654            }
16655
16656            std::cout << " -> tests passed" << std::endl;
16657        }
unstack (pd_test_3_all.cpp:1739)
1729    }
1730    if (s.size() != 3) {
1731        std::cout << "  [FAIL] : in pd_test_3_all_chainable_mutators() : Case H size" << std::endl;
1732        throw std::runtime_error("pd_test_3_all_chainable_mutators failed: Case H size");
1733    }
1734
1735    std::cout << " -> tests passed" << std::endl;
1736}
1737
1738void pd_test_3_all_dataframe_unstack() {
1739    std::cout << "========= DataFrame.unstack() ========================";
1740
1741    std::map<std::string, std::vector<double>> data = {
1742        {"A", {1.0, 2.0, 3.0}},
1743        {"B", {4.0, 5.0, 6.0}}
1744    };
1745    pandas::DataFrame df(data);
1746
1747    // Without MultiIndex, unstack() returns self (matches pandas behavior)
1748    pandas::DataFrame result = df.unstack();
align (pd_test_1_all.cpp:14035)
14025            if (!approx_equal(a_s1, 2.0) || !approx_equal(a_o1, 99.0)) {
14026                passed = false;
14027                std::cout << "  [FAIL] : in pd_test_joining_compare() : difference at row 1 not shown" << std::endl;
14028                throw std::runtime_error("pd_test_joining_compare failed: diff values");
14029            }
14030
14031            std::cout << " -> tests passed" << std::endl;
14032        }
14033
14034        // =====================================================================
14035        // align() Tests
14036        // =====================================================================
14037
14038        void pd_test_joining_align() {
14039            std::cout << "========= align =======================================";
14040
14041            std::map<std::string, std::vector<double>> left_data = {
14042                {"A", {1.0, 2.0}}
14043            };
14044            std::vector<std::string> left_idx = {"x", "y"};
14045            pandas::DataFrame left(left_data, std::make_unique<pandas::Index<std::string>>(left_idx));
align_series (pd_test_2_all.cpp:21152)
21142    std::cout << "  -- test_align_df_series_outer --" << std::endl;
21143
21144    pandas::DataFrame df;
21145    df.add_column("A", std::vector<numpy::float64>{1.0, 2.0});
21146    df.add_column("B", std::vector<numpy::float64>{3.0, 4.0});
21147
21148    pandas::Series<numpy::float64> s({10.0, 20.0}, "vals");
21149    s.set_index(std::make_unique<pandas::Index<std::string>>(
21150        std::vector<std::string>{"B", "C"}));
21151
21152    auto [aligned_df, aligned_s] = df.align_series(s, "outer", 1);
21153
21154    // Outer: columns A, B, C
21155    check(aligned_df.ncols() == 3, "df_ncols_3");
21156    check(aligned_s.size() == 3, "s_size_3");
21157
21158    auto df_cols = aligned_df.columns().to_list();
21159    check(df_cols[0] == "A", "df_col_A");
21160    check(df_cols[1] == "B", "df_col_B");
21161    check(df_cols[2] == "C", "df_col_C");
combine (pd_test_2_all.cpp:1700)
1690        std::cout << "====================================== [OK] pd_test_between_time test suite ========================== " << std::endl;
1691        return 0;
1692    }
1693
1694} // namespace dataframe_tests
1695// ------------------- pd_test_between_time.cpp (end) -----------------------------
1696
1697// ------------------- pd_test_combine.cpp (start) -----------------------------
1698// dataframe_tests/pd_test_combine.cpp
1699// Test for DataFrame.combine() - column-wise combine with another DataFrame
1700
1701#include <iostream>
1702#include <cmath>
1703#include <stdexcept>
1704#include "../pandas/pd_dataframe.h"
1705
1706// CRITICAL: No using namespace directives
1707
1708namespace dataframe_tests {
1709    namespace dataframe_tests_combine {
combine_first (pd_test_1_all.cpp:13889)
13879            if (!approx_equal(b1, 10.0) || !approx_equal(b2, 20.0)) {
13880                passed = false;
13881                std::cout << "  [FAIL] : in pd_test_joining_join_index() : matched rows wrong" << std::endl;
13882                throw std::runtime_error("pd_test_joining_join_index failed: match values");
13883            }
13884
13885            std::cout << " -> tests passed" << std::endl;
13886        }
13887
13888        // =====================================================================
13889        // combine_first() Tests
13890        // =====================================================================
13891
13892        void pd_test_joining_combine_first() {
13893            std::cout << "========= combine_first ===============================";
13894
13895            std::map<std::string, std::vector<double>> left_data = {
13896                {"A", {1.0, std::nan(""), 3.0}},
13897                {"B", {std::nan(""), 5.0, std::nan("")}}
13898            };
13899            std::vector<std::string> left_idx = {"x", "y", "z"};
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710    std::cout << "========= concat factory ==============================";
17711
17712    std::vector<int64_t> ordinals1 = {0, 1};
17713    std::vector<int64_t> ordinals2 = {2, 3};
17714    pandas::PeriodIndex idx1(ordinals1, "D");
17715    pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717    pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719    bool passed = (concatenated.size() == 4);
17720    if (!passed) {
17721        std::cout << "  [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722        throw std::runtime_error("pd_test_period_index_concat failed");
17723    }
17724
17725    std::cout << " -> tests passed" << std::endl;
17726}
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710    std::cout << "========= concat factory ==============================";
17711
17712    std::vector<int64_t> ordinals1 = {0, 1};
17713    std::vector<int64_t> ordinals2 = {2, 3};
17714    pandas::PeriodIndex idx1(ordinals1, "D");
17715    pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717    pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719    bool passed = (concatenated.size() == 4);
17720    if (!passed) {
17721        std::cout << "  [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722        throw std::runtime_error("pd_test_period_index_concat failed");
17723    }
17724
17725    std::cout << " -> tests passed" << std::endl;
17726}
join (pd_test_1_all.cpp:12353)
12343            std::cout << " -> tests passed" << std::endl;
12344        }
12345
12346        void pd_test_index_join() {
12347            std::cout << "========= join ========================================";
12348
12349            pandas::Index<numpy::int64> idx1{1, 2, 3};
12350            pandas::Index<numpy::int64> idx2{2, 3, 4};
12351
12352            auto [inner_joined, left_idx, right_idx] = idx1.join(idx2, "inner");
12353            bool passed = (inner_joined.size() == 2);  // {2, 3}
12354
12355            auto [outer_joined, ol_idx, or_idx] = idx1.join(idx2, "outer");
12356            passed = passed && (outer_joined.size() == 4);  // {1, 2, 3, 4}
12357
12358            if (!passed) {
12359                std::cout << "  [FAIL] : in pd_test_index_join() : join failed" << std::endl;
12360                throw std::runtime_error("pd_test_index_join failed");
12361            }
merge (pd_test_1_all.cpp:13639)
13629namespace dataframe_tests {
13630    namespace dataframe_tests_joining {
13631
13632        // Helper to check approximate equality
13633        bool approx_equal(double a, double b, double tol = 1e-9) {
13634            if (std::isnan(a) && std::isnan(b)) return true;
13635            return std::abs(a - b) < tol;
13636        }
13637
13638        // =====================================================================
13639        // merge() Tests
13640        // =====================================================================
13641
13642        void pd_test_joining_merge_inner() {
13643            std::cout << "========= merge inner join ============================";
13644
13645            // Left DataFrame: id, value_left
13646            std::map<std::string, std::vector<double>> left_data = {
13647                {"id", {1.0, 2.0, 3.0, 4.0}},
13648                {"value_left", {10.0, 20.0, 30.0, 40.0}}
13649            };
merge (pd_test_1_all.cpp:13639)
13629namespace dataframe_tests {
13630    namespace dataframe_tests_joining {
13631
13632        // Helper to check approximate equality
13633        bool approx_equal(double a, double b, double tol = 1e-9) {
13634            if (std::isnan(a) && std::isnan(b)) return true;
13635            return std::abs(a - b) < tol;
13636        }
13637
13638        // =====================================================================
13639        // merge() Tests
13640        // =====================================================================
13641
13642        void pd_test_joining_merge_inner() {
13643            std::cout << "========= merge inner join ============================";
13644
13645            // Left DataFrame: id, value_left
13646            std::map<std::string, std::vector<double>> left_data = {
13647                {"id", {1.0, 2.0, 3.0, 4.0}},
13648                {"value_left", {10.0, 20.0, 30.0, 40.0}}
13649            };
merge (pd_test_1_all.cpp:13639)
13629namespace dataframe_tests {
13630    namespace dataframe_tests_joining {
13631
13632        // Helper to check approximate equality
13633        bool approx_equal(double a, double b, double tol = 1e-9) {
13634            if (std::isnan(a) && std::isnan(b)) return true;
13635            return std::abs(a - b) < tol;
13636        }
13637
13638        // =====================================================================
13639        // merge() Tests
13640        // =====================================================================
13641
13642        void pd_test_joining_merge_inner() {
13643            std::cout << "========= merge inner join ============================";
13644
13645            // Left DataFrame: id, value_left
13646            std::map<std::string, std::vector<double>> left_data = {
13647                {"id", {1.0, 2.0, 3.0, 4.0}},
13648                {"value_left", {10.0, 20.0, 30.0, 40.0}}
13649            };
asfreq (pd_test_1_all.cpp:2869)
2859        std::cout << "========= PeriodArray: asfreq ======================= ";
2860
2861        // Monthly to quarterly
2862        pandas::PeriodArray arr_m(std::vector<std::string>{
2863            "2024-01",
2864            "2024-04",
2865            "2024-07",
2866            "NaT"
2867        }, "M");
2868
2869        auto arr_q = arr_m.asfreq("Q");
2870        if (arr_q.size() != 4) {
2871            std::cout << "  [FAIL] : asfreq size should be 4" << std::endl;
2872            throw std::runtime_error("pd_test_period_array_asfreq failed: size");
2873        }
2874        if (arr_q.freqstr() != "Q") {
2875            std::cout << "  [FAIL] : asfreq freqstr should be 'Q'" << std::endl;
2876            throw std::runtime_error("pd_test_period_array_asfreq failed: freqstr");
2877        }
2878
2879        // Check NaT is preserved
asof (pd_test_2_all.cpp:366)
356        std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357        return 0;
358    }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
asof (pd_test_2_all.cpp:366)
356        std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357        return 0;
358    }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
between_time (pd_test_2_all.cpp:1154)
1144        std::cout << "====================================== [OK] pd_test_at_time test suite ========================== " << std::endl;
1145        return 0;
1146    }
1147
1148} // namespace dataframe_tests
1149// ------------------- pd_test_at_time.cpp (end) -----------------------------
1150
1151// ------------------- pd_test_between_time.cpp (start) -----------------------------
1152// dataframe_tests/pd_test_between_time.cpp
1153// Tests for DataFrame.between_time() method (pandas 2.0+ API)
1154// Selects values between particular times of day from datetime-indexed DataFrame
1155#include <iostream>
1156#include <stdexcept>
1157#include <vector>
1158#include <string>
1159#include <map>
1160#include "../pandas/pd_dataframe.h"
1161
1162// CRITICAL: No using namespace directives
between_time (pd_test_2_all.cpp:1154)
1144        std::cout << "====================================== [OK] pd_test_at_time test suite ========================== " << std::endl;
1145        return 0;
1146    }
1147
1148} // namespace dataframe_tests
1149// ------------------- pd_test_at_time.cpp (end) -----------------------------
1150
1151// ------------------- pd_test_between_time.cpp (start) -----------------------------
1152// dataframe_tests/pd_test_between_time.cpp
1153// Tests for DataFrame.between_time() method (pandas 2.0+ API)
1154// Selects values between particular times of day from datetime-indexed DataFrame
1155#include <iostream>
1156#include <stdexcept>
1157#include <vector>
1158#include <string>
1159#include <map>
1160#include "../pandas/pd_dataframe.h"
1161
1162// CRITICAL: No using namespace directives
diff (pd_test_1_all.cpp:5171)
5161        }
5162
5163        void pd_test_arithmetic_dataframe_diff_shift() {
5164            std::cout << "========= DataFrame diff/shift ==================";
5165
5166            std::map<std::string, std::vector<double>> data;
5167            data["A"] = {1.0, 3.0, 6.0, 10.0};
5168            pandas::DataFrame df(data);
5169
5170            // diff: [NaN, 2, 3, 4]
5171            auto d = df.diff();
5172            std::string val = d["A"].get_value_str(1);
5173            bool passed = std::abs(std::stod(val) - 2.0) < 0.001;
5174            if (!passed) {
5175                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff failed" << std::endl;
5176                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff failed");
5177            }
5178
5179            // First element should be NaN
5180            val = d["A"].get_value_str(0);
5181            passed = std::isnan(std::stod(val));
pct_change (pd_test_1_all.cpp:4621)
4611                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurtosis alias failed");
4612            }
4613
4614            std::cout << " -> tests passed" << std::endl;
4615        }
4616
4617        void pd_test_aggregation_series_pct_change() {
4618            std::cout << "========= Series pct_change =====================";
4619
4620            pandas::Series<double> s({100.0, 110.0, 121.0});
4621            auto pct = s.pct_change();
4622
4623            // First element should be NaN
4624            bool passed = std::isnan(pct[0]);
4625            if (!passed) {
4626                std::cout << "  [FAIL] : in pd_test_aggregation_series_pct_change() : first element should be NaN" << std::endl;
4627                throw std::runtime_error("pd_test_aggregation_series_pct_change failed: first element should be NaN");
4628            }
4629
4630            // Second element should be 0.1 (10% increase)
4631            passed = std::abs(pct[1] - 0.1) < 0.001;
shift (pd_test_1_all.cpp:5188)
5178            // First element should be NaN
5179            val = d["A"].get_value_str(0);
5180            passed = std::isnan(std::stod(val));
5181            if (!passed) {
5182                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff NaN failed" << std::endl;
5183                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff NaN failed");
5184            }
5185
5186            // shift: [NaN, 1, 3, 6]
5187            auto s = df.shift();
5188            val = s["A"].get_value_str(1);
5189            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5190            if (!passed) {
5191                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : shift failed" << std::endl;
5192                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: shift failed");
5193            }
5194
5195            std::cout << " -> tests passed" << std::endl;
5196        }
to_period (pd_test_2_all.cpp:14554)
14544        std::cout << "====================================== [OK] pd_test_to_parquet test suite ========================" << std::endl;
14545        return 0;
14546    }
14547
14548} // namespace dataframe_tests
14549// ------------------- pd_test_to_parquet.cpp (end) -----------------------------
14550
14551// ------------------- pd_test_to_period.cpp (start) -----------------------------
14552// dataframe_tests/pd_test_to_period.cpp
14553// Test suite for DataFrame.to_period() method
14554
14555#include <iostream>
14556#include <stdexcept>
14557#include <vector>
14558#include <string>
14559#include <map>
14560
14561#include "../pandas/pd_dataframe.h"
14562
14563// CRITICAL: No using namespace directives
to_timestamp (pd_test_1_all.cpp:2830)
2820    void pd_test_period_array_to_timestamp() {
2821        std::cout << "========= PeriodArray: to_timestamp ======================= ";
2822
2823        pandas::PeriodArray arr(std::vector<std::string>{
2824            "2024-01",
2825            "2024-06",
2826            "NaT"
2827        }, "M");
2828
2829        // to_timestamp with start
2830        auto ts_start = arr.to_timestamp("start");
2831        if (ts_start.size() != 3) {
2832            std::cout << "  [FAIL] : to_timestamp size should be 3" << std::endl;
2833            throw std::runtime_error("pd_test_period_array_to_timestamp failed: size");
2834        }
2835
2836        auto ts0 = ts_start[0];
2837        if (!ts0.has_value()) {
2838            std::cout << "  [FAIL] : ts_start[0] should have value" << std::endl;
2839            throw std::runtime_error("pd_test_period_array_to_timestamp failed: ts_start[0]");
2840        }
tz_convert (pd_test_2_all.cpp:17874)
17864        std::cout << "====================================== [OK] pd_test_transform test suite ========================== " << std::endl;
17865        return 0;
17866    }
17867
17868} // namespace dataframe_tests
17869// ------------------- pd_test_transform.cpp (end) -----------------------------
17870
17871// ------------------- pd_test_tz_convert.cpp (start) -----------------------------
17872// dataframe_tests/pd_test_tz_convert.cpp
17873// Test for DataFrame.tz_convert() method
17874
17875#include <iostream>
17876#include <stdexcept>
17877#include <cmath>
17878#include "../pandas/pd_dataframe.h"
17879
17880namespace dataframe_tests {
17881    namespace dataframe_tests_tz_convert {
17882
17883        void pd_test_tz_convert_basic() {
tz_localize (pd_test_1_all.cpp:1431)
1421            "2023-06-15"
1422        });
1423
1424        // Initially should be timezone-naive
1425        if (arr.is_tz_aware()) {
1426            std::cout << "  [FAIL] : array should be timezone-naive initially" << std::endl;
1427            throw std::runtime_error("pd_test_datetime_array_timezone failed: naive");
1428        }
1429
1430        // Localize to UTC
1431        auto localized = arr.tz_localize("UTC");
1432        if (!localized.is_tz_aware()) {
1433            std::cout << "  [FAIL] : localized array should be timezone-aware" << std::endl;
1434            throw std::runtime_error("pd_test_datetime_array_timezone failed: localize");
1435        }
1436
1437        // Verify timezone name in dtype
1438        auto dt = localized.dtype();
1439        if (!dt.is_tz_aware()) {
1440            std::cout << "  [FAIL] : dtype should be timezone-aware" << std::endl;
1441            throw std::runtime_error("pd_test_datetime_array_timezone failed: dtype tz");
to_csv (pd_test_1_all.cpp:6967)
6957        void pd_test_dataframe_io() {
6958            std::cout << "========= I/O methods ======================";
6959
6960            std::map<std::string, std::vector<numpy::int64>> data;
6961            data["A"] = {1, 2, 3};
6962            data["B"] = {4, 5, 6};
6963
6964            pandas::DataFrame df(data);
6965
6966            // Test to_csv
6967            std::string csv = df.to_csv(false);
6968            if (csv.empty()) {
6969                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_csv empty" << std::endl;
6970                throw std::runtime_error("pd_test_dataframe_io failed: to_csv empty");
6971            }
6972            if (csv.find("A") == std::string::npos) {
6973                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_csv missing column name" << std::endl;
6974                throw std::runtime_error("pd_test_dataframe_io failed: to_csv missing column name");
6975            }
6976
6977            // Test to_json
to_gbq (pd_test_3_all.cpp:3012)
3002        throw std::runtime_error("to_xml() missing root element");
3003    }
3004    if (xml_output.find("<row>") == std::string::npos) {
3005        throw std::runtime_error("to_xml() missing row elements");
3006    }
3007
3008    std::cout << " -> tests passed" << std::endl;
3009}
3010
3011void pd_test_3_all_df_to_gbq() {
3012    std::cout << "========= DataFrame.to_gbq() =============================";
3013
3014    std::map<std::string, std::vector<double>> data = {
3015        {"A", {1.0, 2.0, 3.0}},
3016        {"B", {4.0, 5.0, 6.0}}
3017    };
3018    pandas::DataFrame df(data);
3019
3020    // Test to_gbq - should throw (not implemented)
3021    bool threw = false;
3022    try {
to_json (pd_test_1_all.cpp:6978)
6968            if (csv.empty()) {
6969                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_csv empty" << std::endl;
6970                throw std::runtime_error("pd_test_dataframe_io failed: to_csv empty");
6971            }
6972            if (csv.find("A") == std::string::npos) {
6973                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_csv missing column name" << std::endl;
6974                throw std::runtime_error("pd_test_dataframe_io failed: to_csv missing column name");
6975            }
6976
6977            // Test to_json
6978            std::string json = df.to_json("columns");
6979            if (json.empty()) {
6980                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_json empty" << std::endl;
6981                throw std::runtime_error("pd_test_dataframe_io failed: to_json empty");
6982            }
6983            if (json.find("{") == std::string::npos) {
6984                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_json not JSON" << std::endl;
6985                throw std::runtime_error("pd_test_dataframe_io failed: to_json not JSON");
6986            }
6987
6988            // Test to_string
to_xml (pd_test_3_all.cpp:2983)
2973    }
2974
2975    std::cout << " -> tests passed" << std::endl;
2976}
2977
2978// ============================================================================
2979// Category 16: DataFrame Plan 2 - Export Functions
2980// ============================================================================
2981
2982void pd_test_3_all_df_to_xml() {
2983    std::cout << "========= DataFrame.to_xml() =============================";
2984
2985    std::map<std::string, std::vector<double>> data = {
2986        {"A", {1.0, 2.0, 3.0}},
2987        {"B", {4.0, 5.0, 6.0}}
2988    };
2989    pandas::DataFrame df(data);
2990
2991    // Test to_xml to string
2992    std::string xml_output = df.to_xml();
2993    if (xml_output.empty()) {
astype (pd_test_1_all.cpp:21292)
21282            std::cout << "========= astype all columns to float64 =============";
21283
21284            // Create DataFrame with int64 columns
21285            std::map<std::string, std::vector<numpy::int64>> data;
21286            data["A"] = {1, 2, 3, 4, 5};
21287            data["B"] = {10, 20, 30, 40, 50};
21288
21289            pandas::DataFrame df(data);
21290
21291            // Convert all columns to float64
21292            pandas::DataFrame df_float = df.astype("float64");
21293
21294            // Verify dtype changed
21295            pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297            bool passed = true;
21298            if (dtypes[static_cast<size_t>(0)] != "float64") {
21299                std::cout << "  [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300                passed = false;
21301            }
21302            if (dtypes[static_cast<size_t>(1)] != "float64") {
astype (pd_test_1_all.cpp:21292)
21282            std::cout << "========= astype all columns to float64 =============";
21283
21284            // Create DataFrame with int64 columns
21285            std::map<std::string, std::vector<numpy::int64>> data;
21286            data["A"] = {1, 2, 3, 4, 5};
21287            data["B"] = {10, 20, 30, 40, 50};
21288
21289            pandas::DataFrame df(data);
21290
21291            // Convert all columns to float64
21292            pandas::DataFrame df_float = df.astype("float64");
21293
21294            // Verify dtype changed
21295            pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297            bool passed = true;
21298            if (dtypes[static_cast<size_t>(0)] != "float64") {
21299                std::cout << "  [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300                passed = false;
21301            }
21302            if (dtypes[static_cast<size_t>(1)] != "float64") {
astype (pd_test_1_all.cpp:21292)
21282            std::cout << "========= astype all columns to float64 =============";
21283
21284            // Create DataFrame with int64 columns
21285            std::map<std::string, std::vector<numpy::int64>> data;
21286            data["A"] = {1, 2, 3, 4, 5};
21287            data["B"] = {10, 20, 30, 40, 50};
21288
21289            pandas::DataFrame df(data);
21290
21291            // Convert all columns to float64
21292            pandas::DataFrame df_float = df.astype("float64");
21293
21294            // Verify dtype changed
21295            pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297            bool passed = true;
21298            if (dtypes[static_cast<size_t>(0)] != "float64") {
21299                std::cout << "  [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300                passed = false;
21301            }
21302            if (dtypes[static_cast<size_t>(1)] != "float64") {
astype (pd_test_1_all.cpp:21292)
21282            std::cout << "========= astype all columns to float64 =============";
21283
21284            // Create DataFrame with int64 columns
21285            std::map<std::string, std::vector<numpy::int64>> data;
21286            data["A"] = {1, 2, 3, 4, 5};
21287            data["B"] = {10, 20, 30, 40, 50};
21288
21289            pandas::DataFrame df(data);
21290
21291            // Convert all columns to float64
21292            pandas::DataFrame df_float = df.astype("float64");
21293
21294            // Verify dtype changed
21295            pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297            bool passed = true;
21298            if (dtypes[static_cast<size_t>(0)] != "float64") {
21299                std::cout << "  [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300                passed = false;
21301            }
21302            if (dtypes[static_cast<size_t>(1)] != "float64") {
bool_ (pd_test_1_all.cpp:9120)
9110void pd_test_datetime_mixin_array_constructor() {
9111    std::cout << "========= DatetimeTDMixin array constructor =========================";
9112
9113    // Create DatetimeArray with some values
9114    numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9115    data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2001
9116    data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2017
9117    data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2020
9118
9119    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9120    mask.setElementAt({0}, numpy::bool_(false));
9121    mask.setElementAt({1}, numpy::bool_(false));
9122    mask.setElementAt({2}, numpy::bool_(false));
9123
9124    pandas::DatetimeArray arr(data, mask);
9125    pandas::DatetimeTDMixin idx(arr, "timestamps");
9126
9127    bool passed = (idx.size() == 3 && !idx.empty() &&
9128                   idx.name().has_value() && *idx.name() == "timestamps" &&
9129                   idx.inferred_type() == "datetime");
9130    if (!passed) {
convert_dtypes (pd_test_1_all.cpp:27317)
27307        void pd_test_convert_dtypes_integer_strings() {
27308            std::cout << "========= convert_dtypes: integer strings ============";
27309
27310            // Create DataFrame with string column containing integers
27311            std::map<std::string, std::vector<std::string>> data;
27312            data["a"] = {"1", "2", "3", "4", "5"};
27313
27314            pandas::DataFrame df(data);
27315
27316            // Convert dtypes
27317            pandas::DataFrame converted = df.convert_dtypes();
27318
27319            // After conversion, should be int64
27320            pandas::Series<std::string> dtypes_after = converted.dtypes();
27321            std::string dtype_a = dtypes_after[static_cast<size_t>(0)];
27322
27323            // Verify the dtype was converted to Int64 (nullable integer, per pandas convert_dtypes behavior)
27324            bool passed = (dtype_a == "Int64" || dtype_a == "int64");
27325            if (!passed) {
27326                std::cout << "  [FAIL] : in pd_test_convert_dtypes_integer_strings() : expected Int64, got " << dtype_a << std::endl;
27327                throw std::runtime_error("pd_test_convert_dtypes_integer_strings failed: dtype mismatch");
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793    std::cout << "========= copy ========================================";
5794
5795    pandas::CategoricalArray arr({"a", "b", "c"});
5796    pandas::CategoricalIndex idx(arr, "original");
5797
5798    pandas::CategoricalIndex copied = idx.copy();
5799
5800    bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801                   copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802    if (!passed) {
5803        std::cout << "  [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804        throw std::runtime_error("pd_test_categorical_index_copy failed");
5805    }
5806
5807    std::cout << " -> tests passed" << std::endl;
5808}
infer_objects (pd_test_1_all.cpp:27595)
27585            // Create DataFrame with string column containing integers
27586            std::map<std::string, std::vector<std::string>> data;
27587            data["A"] = {"1", "2", "3", "4", "5"};
27588
27589            pandas::DataFrame df(data);
27590
27591            // Before inference, dtype should be string/object
27592            std::string before_dtype = df["A"].dtype_name();
27593
27594            // Apply infer_objects
27595            pandas::DataFrame result = df.infer_objects();
27596
27597            // After inference, dtype should be int64
27598            std::string after_dtype = result["A"].dtype_name();
27599
27600            bool passed = (after_dtype == "int64");
27601            if (!passed) {
27602                std::cout << "  [FAIL] : in pd_test_infer_objects_integer_column() : expected int64, got " << after_dtype << std::endl;
27603                throw std::runtime_error("pd_test_infer_objects_integer_column failed");
27604            }
begin (pd_test_1_all.cpp:457)
447        };
448        pandas::CategoricalArray arr(values);
449
450        const std::vector<std::string>& cats = arr.categories();
451        if (cats.size() != 3) {
452            std::cout << "  [FAIL] : in pd_test_categorical_array_categories_property() : categories size != 3" << std::endl;
453            throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories size != 3");
454        }
455
456        // Categories should be unique
457        std::set<std::string> unique_cats(cats.begin(), cats.end());
458        if (unique_cats.size() != cats.size()) {
459            std::cout << "  [FAIL] : in pd_test_categorical_array_categories_property() : categories not unique" << std::endl;
460            throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories not unique");
461        }
462
463        std::cout << " -> tests passed" << std::endl;
464    }
465
466    void pd_test_categorical_array_codes_property() {
467        std::cout << "========= CategoricalArray: codes property ======================= ";
begin (pd_test_1_all.cpp:457)
447        };
448        pandas::CategoricalArray arr(values);
449
450        const std::vector<std::string>& cats = arr.categories();
451        if (cats.size() != 3) {
452            std::cout << "  [FAIL] : in pd_test_categorical_array_categories_property() : categories size != 3" << std::endl;
453            throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories size != 3");
454        }
455
456        // Categories should be unique
457        std::set<std::string> unique_cats(cats.begin(), cats.end());
458        if (unique_cats.size() != cats.size()) {
459            std::cout << "  [FAIL] : in pd_test_categorical_array_categories_property() : categories not unique" << std::endl;
460            throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories not unique");
461        }
462
463        std::cout << " -> tests passed" << std::endl;
464    }
465
466    void pd_test_categorical_array_codes_property() {
467        std::cout << "========= CategoricalArray: codes property ======================= ";
end (pd_test_1_all.cpp:457)
447        };
448        pandas::CategoricalArray arr(values);
449
450        const std::vector<std::string>& cats = arr.categories();
451        if (cats.size() != 3) {
452            std::cout << "  [FAIL] : in pd_test_categorical_array_categories_property() : categories size != 3" << std::endl;
453            throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories size != 3");
454        }
455
456        // Categories should be unique
457        std::set<std::string> unique_cats(cats.begin(), cats.end());
458        if (unique_cats.size() != cats.size()) {
459            std::cout << "  [FAIL] : in pd_test_categorical_array_categories_property() : categories not unique" << std::endl;
460            throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories not unique");
461        }
462
463        std::cout << " -> tests passed" << std::endl;
464    }
465
466    void pd_test_categorical_array_codes_property() {
467        std::cout << "========= CategoricalArray: codes property ======================= ";
end (pd_test_1_all.cpp:457)
447        };
448        pandas::CategoricalArray arr(values);
449
450        const std::vector<std::string>& cats = arr.categories();
451        if (cats.size() != 3) {
452            std::cout << "  [FAIL] : in pd_test_categorical_array_categories_property() : categories size != 3" << std::endl;
453            throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories size != 3");
454        }
455
456        // Categories should be unique
457        std::set<std::string> unique_cats(cats.begin(), cats.end());
458        if (unique_cats.size() != cats.size()) {
459            std::cout << "  [FAIL] : in pd_test_categorical_array_categories_property() : categories not unique" << std::endl;
460            throw std::runtime_error("pd_test_categorical_array_categories_property failed: categories not unique");
461        }
462
463        std::cout << " -> tests passed" << std::endl;
464    }
465
466    void pd_test_categorical_array_codes_property() {
467        std::cout << "========= CategoricalArray: codes property ======================= ";
items (pd_test_1_all.cpp:16554)
16544        // =====================================================================
16545        // Iteration Tests (items, keys)
16546        // =====================================================================
16547
16548        void pd_test_ndframe_items_keys() {
16549            std::cout << "========= items/keys ===========================================" << std::endl;
16550
16551            pandas::Series<int> s({10, 20, 30});
16552
16553            // Test items()
16554            std::vector<std::string> collected_keys;
16555            std::vector<int> collected_values;
16556
16557            s.items([&](const std::string& key, int value) {
16558                collected_keys.push_back(key);
16559                collected_values.push_back(value);
16560            });
16561
16562            bool passed = collected_keys.size() == 3;
16563            if (!passed) {
iterrows (pd_test_2_all.cpp:6498)
6488                    std::cout << "  [FAIL] : in pd_test_iter_items_content_access() : dtype expected int, got "
6489                              << dtype << std::endl;
6490                    throw std::runtime_error("pd_test_iter_items_content_access failed: dtype");
6491                }
6492            }
6493
6494            std::cout << " -> tests passed" << std::endl;
6495        }
6496
6497        // =========================================================================
6498        // iterrows() tests
6499        // =========================================================================
6500
6501        void pd_test_iter_iterrows_basic() {
6502            std::cout << "========= iterrows() basic iteration =============";
6503
6504            // Create a simple DataFrame
6505            std::map<std::string, std::vector<std::string>> data = {
6506                {"A", {"a", "b", "c"}},
6507                {"B", {"1", "2", "3"}},
6508                {"C", {"x", "y", "z"}}
itertuples (pd_test_2_all.cpp:6774)
6764            passed = (it == end);
6765            if (!passed) {
6766                std::cout << "  [FAIL] : in pd_test_iter_iterrows_iterator_methods() : didn't reach end" << std::endl;
6767                throw std::runtime_error("pd_test_iter_iterrows_iterator_methods failed: didn't reach end");
6768            }
6769
6770            std::cout << " -> tests passed" << std::endl;
6771        }
6772
6773        // =========================================================================
6774        // itertuples() tests
6775        // =========================================================================
6776
6777        void pd_test_iter_itertuples_basic() {
6778            std::cout << "========= itertuples() basic iteration ===========";
6779
6780            // Create a DataFrame similar to pandas example
6781            std::map<std::string, std::vector<int>> data = {
6782                {"num_legs", {4, 2}},
6783                {"num_wings", {0, 2}}
6784            };
keys (pd_test_1_all.cpp:16319)
16309            }
16310
16311            // Test default value
16312            passed = attrs.get<int>("missing", 99) == 99;
16313            if (!passed) {
16314                std::cout << "  [FAIL] : in pd_test_ndframe_attrs() : default value" << std::endl;
16315                throw std::runtime_error("pd_test_ndframe_attrs failed: default value");
16316            }
16317
16318            // Test keys
16319            auto keys = attrs.keys();
16320            passed = keys.size() == 3;
16321            if (!passed) {
16322                std::cout << "  [FAIL] : in pd_test_ndframe_attrs() : keys()" << std::endl;
16323                throw std::runtime_error("pd_test_ndframe_attrs failed: keys()");
16324            }
16325
16326            // Test remove
16327            passed = attrs.remove("count") && !attrs.contains("count");
16328            if (!passed) {
16329                std::cout << "  [FAIL] : in pd_test_ndframe_attrs() : remove" << std::endl;
duplicated (pd_test_1_all.cpp:10583)
10573    std::cout << " -> tests passed" << std::endl;
10574}
10575
10576void pd_test_extension_index_duplicated() {
10577    std::cout << "========= duplicated =========================";
10578
10579    pandas::CategoricalArray arr({"a", "b", "a", "c", "a"});
10580    pandas::CategoricalIndex idx(arr);
10581
10582    auto dup_mask = idx.duplicated("first");
10583
10584    bool passed = (dup_mask.getElementAt({0}) == false &&
10585                   dup_mask.getElementAt({1}) == false &&
10586                   dup_mask.getElementAt({2}) == true &&
10587                   dup_mask.getElementAt({3}) == false &&
10588                   dup_mask.getElementAt({4}) == true);
10589    if (!passed) {
10590        std::cout << "  [FAIL] : in pd_test_extension_index_duplicated() : duplicated check failed" << std::endl;
10591        throw std::runtime_error("pd_test_extension_index_duplicated failed");
10592    }
isin (pd_test_1_all.cpp:5938)
5928    std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932    std::cout << "========= inherited isin ==============================";
5933
5934    pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935    pandas::CategoricalIndex idx(arr);
5936
5937    std::vector<std::string> values = {"a", "c"};
5938    numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940    bool passed = (mask.getSize() == 4 &&
5941                   mask.getElementAt({0}) == true &&   // a
5942                   mask.getElementAt({1}) == false &&  // b
5943                   mask.getElementAt({2}) == true &&   // c
5944                   mask.getElementAt({3}) == false);   // d
5945    if (!passed) {
5946        std::cout << "  [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947        throw std::runtime_error("pd_test_categorical_index_isin failed");
5948    }
isin (pd_test_1_all.cpp:5938)
5928    std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932    std::cout << "========= inherited isin ==============================";
5933
5934    pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935    pandas::CategoricalIndex idx(arr);
5936
5937    std::vector<std::string> values = {"a", "c"};
5938    numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940    bool passed = (mask.getSize() == 4 &&
5941                   mask.getElementAt({0}) == true &&   // a
5942                   mask.getElementAt({1}) == false &&  // b
5943                   mask.getElementAt({2}) == true &&   // c
5944                   mask.getElementAt({3}) == false);   // d
5945    if (!passed) {
5946        std::cout << "  [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947        throw std::runtime_error("pd_test_categorical_index_isin failed");
5948    }
is_na_at (pd_test_5_all.cpp:35205)
35195    pandas::DataFrame df;
35196    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35197    pandas_tests::check(df["X"].get_value_bool(0) == true,  "case_3.idx0_true",   local_fail);
35198    pandas_tests::check(df["X"].get_value_bool(1) == false, "case_3.idx1_NA_false", local_fail);
35199    pandas_tests::check(df["X"].get_value_bool(2) == false, "case_3.idx2_false",  local_fail);
35200}
35201
35202void bool_nullable_826495_case_4_is_na_at_mask_aware(int& local_fail) {
35203    pandas::DataFrame df;
35204    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35205    pandas_tests::check(df["X"].is_na_at(0) == false, "case_4.idx0_not_na", local_fail);
35206    pandas_tests::check(df["X"].is_na_at(1) == true,  "case_4.idx1_is_na",  local_fail);
35207    pandas_tests::check(df["X"].is_na_at(2) == false, "case_4.idx2_not_na", local_fail);
35208}
35209
35210void bool_nullable_826495_case_5_fillna_preserves_dtype(int& local_fail) {
35211    pandas::DataFrame df;
35212    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35213    pandas_tests::check(df["X"].dtype_name() == "boolean", "case_5.pre_dtype", local_fail);
35214    auto df_filled = df.fillna(1.0);
35215    pandas_tests::check(df_filled["X"].dtype_name() == "boolean",
is_numeric_dtype (pd_test_2_all.cpp:19939)
19929    check(labels2[0] == "33.3%", "pct_label_33_3");
19930}
19931
19932// =====================================================================
19933// Test: is_numeric_dtype covers all expected types
19934// =====================================================================
19935
19936void pd_test_describe_is_numeric_dtype() {
19937    std::cout << "  -- pd_test_describe_is_numeric_dtype --" << std::endl;
19938
19939    check(pandas::DataFrame::is_numeric_dtype("int64"), "is_numeric_int64");
19940    check(pandas::DataFrame::is_numeric_dtype("Int64"), "is_numeric_Int64");
19941    check(pandas::DataFrame::is_numeric_dtype("float64"), "is_numeric_float64");
19942    check(pandas::DataFrame::is_numeric_dtype("Float64"), "is_numeric_Float64");
19943    check(pandas::DataFrame::is_numeric_dtype("uint8"), "is_numeric_uint8");
19944    check(pandas::DataFrame::is_numeric_dtype("UInt32"), "is_numeric_UInt32");
19945    check(!pandas::DataFrame::is_numeric_dtype("object"), "not_numeric_object");
19946    check(!pandas::DataFrame::is_numeric_dtype("string"), "not_numeric_string");
19947    check(!pandas::DataFrame::is_numeric_dtype("bool"), "not_numeric_bool");
19948    check(!pandas::DataFrame::is_numeric_dtype("datetime64[ns]"), "not_numeric_datetime");
19949}
abs (pd_test_1_all.cpp:283)
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
all (pd_test_1_all.cpp:247)
237        pandas::BooleanArray has_true({
238            std::optional<bool>(false),
239            std::optional<bool>(true)
240        });
241        any_result = has_true.any();
242        if (!any_result.has_value() || !any_result.value()) {
243            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : any() with True" << std::endl;
244            throw std::runtime_error("pd_test_boolean_array_reductions failed: any() with True");
245        }
246
247        // Test all()
248        pandas::BooleanArray all_true({
249            std::optional<bool>(true),
250            std::optional<bool>(true)
251        });
252        auto all_result = all_true.all();
253        if (!all_result.has_value() || !all_result.value()) {
254            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : all() of all True" << std::endl;
255            throw std::runtime_error("pd_test_boolean_array_reductions failed: all() all True");
256        }
all_axis (pd_test_1_all.cpp:22302)
22292        std::cout << "====================================== [OK] pd_test_where_mask test suite ========================== " << std::endl;
22293        return 0;
22294    }
22295
22296} // namespace dataframe_tests
22297// ------------------- pd_test_where_mask.cpp (end) -----------------------------
22298
22299// ------------------- pd_test_all_any.cpp (start) -----------------------------
22300// dataframe_tests/pd_test_all_any.cpp
22301// Tests for DataFrame.all_axis() and DataFrame.any_axis() methods
22302
22303#include <iostream>
22304#include <stdexcept>
22305#include <cmath>
22306#include <limits>
22307#include "../pandas/pd_dataframe.h"
22308
22309// CRITICAL: No using namespace directives
22310
22311namespace dataframe_tests {
all_columns_timedelta (pd_test_5_all.cpp:64597)
64587    pandas_tests::check(!threw, "case_35_four_col.did_not_throw", local_fail);
64588    if (threw) return;
64589    pandas_tests::check(min_dtype == "int32",
64590        "case_35_four_col.min_dtype_is_int32_got_" + min_dtype, local_fail);
64591    pandas_tests::check(max_dtype == "int32",
64592        "case_35_four_col.max_dtype_is_int32_got_" + max_dtype, local_fail);
64593}
64594
64595void f_df_min_max_axis1_dtype_9_4287513_case_36_timedelta_only_regression(int& local_fail) {
64596    std::cout << "-- f_df_min_max_axis1_dtype_9_4287513_case_36_timedelta_only_regression\n";
64597    // The all_columns_timedelta() branch is preserved by plan_38 — must
64598    // remain identical pre-fix and post-fix.
64599    pandas::DataFrame df;
64600    df.add_column<pandas::Timedelta>("a", {
64601        pandas::Timedelta("1 days"),
64602        pandas::Timedelta("2 days"),
64603        pandas::Timedelta("3 days")});
64604    df.add_column<pandas::Timedelta>("b", {
64605        pandas::Timedelta("4 days"),
64606        pandas::Timedelta("5 days"),
64607        pandas::Timedelta("6 days")});
any (pd_test_1_all.cpp:226)
216            std::cout << "  [FAIL] : in pd_test_boolean_array_kleene_not() : ~NA should be NA" << std::endl;
217            throw std::runtime_error("pd_test_boolean_array_kleene_not failed: ~NA");
218        }
219
220        std::cout << " -> tests passed" << std::endl;
221    }
222
223    void pd_test_boolean_array_reductions() {
224        std::cout << "========= BooleanArray: reductions ======================= ";
225
226        // Test any()
227        pandas::BooleanArray all_false({
228            std::optional<bool>(false),
229            std::optional<bool>(false)
230        });
231        auto any_result = all_false.any();
232        if (!any_result.has_value() || any_result.value()) {
233            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : any() of all False" << std::endl;
234            throw std::runtime_error("pd_test_boolean_array_reductions failed: any() all False");
235        }
any_axis (pd_test_1_all.cpp:22302)
22292        std::cout << "====================================== [OK] pd_test_where_mask test suite ========================== " << std::endl;
22293        return 0;
22294    }
22295
22296} // namespace dataframe_tests
22297// ------------------- pd_test_where_mask.cpp (end) -----------------------------
22298
22299// ------------------- pd_test_all_any.cpp (start) -----------------------------
22300// dataframe_tests/pd_test_all_any.cpp
22301// Tests for DataFrame.all_axis() and DataFrame.any_axis() methods
22302
22303#include <iostream>
22304#include <stdexcept>
22305#include <cmath>
22306#include <limits>
22307#include "../pandas/pd_dataframe.h"
22308
22309// CRITICAL: No using namespace directives
22310
22311namespace dataframe_tests {
arr (pd_test_1_all.cpp:45)
35            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : initializer_list size != 4" << std::endl;
36            throw std::runtime_error("pd_test_boolean_array_constructors failed: initializer_list size != 4");
37        }
38
39        std::cout << " -> tests passed" << std::endl;
40    }
41
42    void pd_test_boolean_array_na_handling() {
43        std::cout << "========= BooleanArray: NA handling ======================= ";
44
45        pandas::BooleanArray arr({
46            std::optional<bool>(true),
47            std::nullopt,  // NA at index 1
48            std::optional<bool>(false)
49        });
50
51        if (!arr.is_na(1)) {
52            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(1) should be true" << std::endl;
53            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(1) should be true");
54        }
assemble_multi_series_dataframe (pd_test_3_all.cpp:29020)
29010}
29011
29012void pd_test_df_apply_assembly_assemble_multi_series_df() {
29013    std::cout << "  -- pd_test_df_apply_assembly_assemble_multi_series_df --" << std::endl;
29014    int fail = 0;
29015    auto df = make_numeric_df();
29016    std::vector<std::unique_ptr<pandas::NDFrameBase>> cols;
29017    cols.push_back(df.column_to_typed_series(0));
29018    cols.push_back(df.column_to_typed_series(1));
29019    cols.push_back(df.column_to_typed_series(2));
29020    auto out = pandas::DataFrame::assemble_multi_series_dataframe(
29021        std::move(cols), {"A", "B", "C"}, df.index().clone());
29022    fail += dfa_check(out.nrows() == 3, "nrows == 3");
29023    fail += dfa_check(out.ncols() == 3, "ncols == 3");
29024    fail += dfa_check(out.columns().get_value_str(0) == "A", "col 0 == A");
29025    fail += dfa_check(out.columns().get_value_str(2) == "C", "col 2 == C");
29026    fail += dfa_check(out["A"].get_value_double(1) == 2.0, "A[1] == 2.0");
29027    fail += dfa_check(out["C"].get_value_double(2) == 9.0, "C[2] == 9.0");
29028    if (fail == 0) std::cout << "    OK" << std::endl;
29029    if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_assemble_multi_series_df failed");
29030}
assemble_scalar_series (pd_test_3_all.cpp:28979)
28969        fail += dfa_check(ss->cat_ordered() == true, "cat_ordered preserved");
28970    }
28971    if (fail == 0) std::cout << "    OK" << std::endl;
28972    if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_column_to_typed_series_category failed");
28973}
28974
28975void pd_test_df_apply_assembly_assemble_scalar_series_int() {
28976    std::cout << "  -- pd_test_df_apply_assembly_assemble_scalar_series_int --" << std::endl;
28977    int fail = 0;
28978    auto df = make_numeric_df();
28979    auto s = df.assemble_scalar_series({1.0, 2.0, 3.0}, {"A", "B", "C"});
28980    fail += dfa_check(s.size() == 3, "size == 3");
28981    fail += dfa_check(s.dtype_name() == "int64", "dtype int64 from whole-number detection");
28982    fail += dfa_check(s.index().size() == 3, "index size == 3");
28983    fail += dfa_check(s.index().get_value_str(0) == "A", "index[0] == A");
28984    if (fail == 0) std::cout << "    OK" << std::endl;
28985    if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_assemble_scalar_series_int failed");
28986}
28987
28988void pd_test_df_apply_assembly_assemble_scalar_series_float() {
28989    std::cout << "  -- pd_test_df_apply_assembly_assemble_scalar_series_float --" << std::endl;
assemble_scalar_string_series (pd_test_3_all.cpp:29004)
28994    fail += dfa_check(s.dtype_name() == "float64", "dtype float64");
28995    fail += dfa_check(s.index().get_value_str(1) == "B", "index[1] == B");
28996    if (fail == 0) std::cout << "    OK" << std::endl;
28997    if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_assemble_scalar_series_float failed");
28998}
28999
29000void pd_test_df_apply_assembly_assemble_scalar_string_series() {
29001    std::cout << "  -- pd_test_df_apply_assembly_assemble_scalar_string_series --" << std::endl;
29002    int fail = 0;
29003    auto df = make_numeric_df();
29004    auto s = df.assemble_scalar_string_series({"foo", "bar", "baz"}, {"A", "B", "C"});
29005    fail += dfa_check(s.size() == 3, "size == 3");
29006    fail += dfa_check(s[0] == "foo" && s[2] == "baz", "values");
29007    fail += dfa_check(s.index().get_value_str(2) == "C", "index[2] == C");
29008    if (fail == 0) std::cout << "    OK" << std::endl;
29009    if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_assemble_scalar_string_series failed");
29010}
29011
29012void pd_test_df_apply_assembly_assemble_multi_series_df() {
29013    std::cout << "  -- pd_test_df_apply_assembly_assemble_multi_series_df --" << std::endl;
29014    int fail = 0;
axes (pd_test_1_all.cpp:16602)
16592        // =====================================================================
16593        // Axes Tests
16594        // =====================================================================
16595
16596        void pd_test_ndframe_axes() {
16597            std::cout << "========= axes =================================================" << std::endl;
16598
16599            pandas::Series<double> s({1.0, 2.0, 3.0});
16600
16601            auto axes = s.axes();
16602
16603            bool passed = axes.size() == 1;
16604            if (!passed) {
16605                std::cout << "  [FAIL] : in pd_test_ndframe_axes() : axes count" << std::endl;
16606                throw std::runtime_error("pd_test_ndframe_axes failed: axes count");
16607            }
16608
16609            passed = axes[0]->size() == 3;
16610            if (!passed) {
16611                std::cout << "  [FAIL] : in pd_test_ndframe_axes() : axis size" << std::endl;
broadcast_series_to_bool_dataframe (pd_test_3_all.cpp:28228)
28218            r.iat<double>(0, b) != -1.0 || r.iat<double>(1, b) != 4.0) {
28219            throw std::runtime_error("pd_test_where_df_condition failed");
28220        }
28221        std::cout << "PASSED" << std::endl;
28222    }
28223
28224    void pd_test_where_series_condition_axis0() {
28225        std::cout << "  pd_test_where_series_condition_axis0... ";
28226        auto df = make_df_2x2();
28227        pandas::Series<numpy::float64> s({1.0, 0.0});
28228        auto cond = df.broadcast_series_to_bool_dataframe(s, 0);
28229        auto r = df.where(cond, -1.0);
28230        size_t a = df.get_column_index("A");
28231        size_t b = df.get_column_index("B");
28232        if (r.iat<double>(0, a) != 1.0 || r.iat<double>(1, a) != -1.0 ||
28233            r.iat<double>(0, b) != 3.0 || r.iat<double>(1, b) != -1.0) {
28234            throw std::runtime_error("pd_test_where_series_condition_axis0 failed");
28235        }
28236        std::cout << "PASSED" << std::endl;
28237    }
cbegin (pd_test_2_all.cpp:6185)
6175            std::cout << "========= const iteration ========================";
6176
6177            std::map<std::string, std::vector<int>> data = {
6178                {"Col1", {1}},
6179                {"Col2", {2}}
6180            };
6181            const pandas::DataFrame df(data);
6182
6183            // Test const iteration
6184            std::vector<std::string> cols;
6185            for (auto it = df.cbegin(); it != df.cend(); ++it) {
6186                cols.push_back(*it);
6187            }
6188
6189            bool passed = (cols.size() == 2);
6190            if (!passed) {
6191                std::cout << "  [FAIL] : in pd_test_iter_const_iteration() : expected 2 columns" << std::endl;
6192                throw std::runtime_error("pd_test_iter_const_iteration failed: size mismatch");
6193            }
6194
6195            std::cout << " -> tests passed" << std::endl;
ceil (pd_test_1_all.cpp:4949)
4939                throw std::runtime_error("pd_test_arithmetic_series_round failed: round failed");
4940            }
4941
4942            auto f = a.floor();
4943            passed = std::abs(f[0] - 1.0) < 0.001 && std::abs(f[2] - 3.0) < 0.001 && std::abs(f[3] - (-2.0)) < 0.001;
4944            if (!passed) {
4945                std::cout << "  [FAIL] : in pd_test_arithmetic_series_round() : floor failed" << std::endl;
4946                throw std::runtime_error("pd_test_arithmetic_series_round failed: floor failed");
4947            }
4948
4949            auto c = a.ceil();
4950            passed = std::abs(c[0] - 2.0) < 0.001 && std::abs(c[2] - 4.0) < 0.001 && std::abs(c[3] - (-1.0)) < 0.001;
4951            if (!passed) {
4952                std::cout << "  [FAIL] : in pd_test_arithmetic_series_round() : ceil failed" << std::endl;
4953                throw std::runtime_error("pd_test_arithmetic_series_round failed: ceil failed");
4954            }
4955
4956            // Round with decimals
4957            pandas::Series<double> b({1.234, 2.567, 3.891});
4958            auto r2 = b.round(2);
4959            passed = std::abs(r2[0] - 1.23) < 0.001 && std::abs(r2[1] - 2.57) < 0.001;
cend (pd_test_2_all.cpp:6185)
6175            std::cout << "========= const iteration ========================";
6176
6177            std::map<std::string, std::vector<int>> data = {
6178                {"Col1", {1}},
6179                {"Col2", {2}}
6180            };
6181            const pandas::DataFrame df(data);
6182
6183            // Test const iteration
6184            std::vector<std::string> cols;
6185            for (auto it = df.cbegin(); it != df.cend(); ++it) {
6186                cols.push_back(*it);
6187            }
6188
6189            bool passed = (cols.size() == 2);
6190            if (!passed) {
6191                std::cout << "  [FAIL] : in pd_test_iter_const_iteration() : expected 2 columns" << std::endl;
6192                throw std::runtime_error("pd_test_iter_const_iteration failed: size mismatch");
6193            }
6194
6195            std::cout << " -> tests passed" << std::endl;
classify_column_access (pd_test_2_all.cpp:20218)
20208// =========================================================================
20209// Classification tests
20210// =========================================================================
20211
20212void pd_test_getitem_dispatch_classify_numeric() {
20213    std::cout << "pd_test_getitem_dispatch_classify_numeric" << std::endl;
20214    pandas::DataFrame df;
20215    std::vector<numpy::float64> vals = {1.0, 2.0, 3.0};
20216    df.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals, "A"), true);
20217
20218    auto t = df.classify_column_access("A");
20219    check(t == pandas::DataFrame::ColumnAccessType::NumericColumn, "float64 -> NumericColumn");
20220
20221    // int64 column
20222    pandas::DataFrame df2;
20223    std::vector<numpy::int64> ivals = {10, 20, 30};
20224    auto iseries = std::make_unique<pandas::Series<numpy::int64>>(ivals, "B");
20225    iseries->set_dtype_override("int64");
20226    df2.insert(0, "B", std::move(iseries), true);
20227    auto t2 = df2.classify_column_access("B");
20228    check(t2 == pandas::DataFrame::ColumnAccessType::NumericColumn, "int64 -> NumericColumn");
classify_column_dtypes (pd_test_2_all.cpp:19207)
19197              << (dataframe_tests_agg_dispatch::g_fail == 0 ? "OK" : "FAIL")
19198              << "] pd_test_agg_dispatch test suite ========================== " << std::endl;
19199    return dataframe_tests_agg_dispatch::g_fail;
19200}
19201
19202}  // namespace dataframe_tests
19203// ------------------- pd_test_agg_dispatch.cpp (end) -----------------------------
19204
19205// ------------------- pd_test_agg_dtype.cpp (start) -----------------------------
19206// pd_test_agg_dtype.cpp - Tests for aggregation dtype resolution migration
19207// AggColumnInfo struct, classify_column_dtypes(), enhanced sum()
19208
19209#include <iostream>
19210#include <string>
19211#include <vector>
19212#include <cmath>
19213
19214#include "../pandas/pd_dataframe.h"
19215#include "../pandas/pd_series.h"
19216#include "../pandas/pd_index.h"
clip (pd_test_1_all.cpp:5099)
5089                throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: abs failed");
5090            }
5091
5092            val = a["A"].get_value_str(2);
5093            passed = std::abs(std::stod(val) - 3.0) < 0.001;
5094            if (!passed) {
5095                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_abs_clip() : abs for -3 failed" << std::endl;
5096                throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: abs for -3 failed");
5097            }
5098
5099            auto c = df.clip(-2.0, 2.0);
5100            val = c["A"].get_value_str(2);
5101            passed = std::abs(std::stod(val) - (-2.0)) < 0.001;  // -3 clipped to -2
5102            if (!passed) {
5103                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_abs_clip() : clip lower failed" << std::endl;
5104                throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: clip lower failed");
5105            }
5106
5107            val = c["A"].get_value_str(3);
5108            passed = std::abs(std::stod(val) - 2.0) < 0.001;  // 4 clipped to 2
5109            if (!passed) {
clone (pd_test_1_all.cpp:5776)
5766    std::cout << " -> tests passed" << std::endl;
5767}
5768
5769void pd_test_categorical_index_clone() {
5770    std::cout << "========= clone =======================================";
5771
5772    pandas::CategoricalArray arr({"p", "q", "r"});
5773    pandas::CategoricalIndex idx(arr, "original");
5774
5775    std::unique_ptr<pandas::IndexBase> cloned = idx.clone();
5776
5777    bool passed = (cloned != nullptr && cloned->size() == idx.size() &&
5778                   cloned->name() == idx.name());
5779    if (!passed) {
5780        std::cout << "  [FAIL] : in pd_test_categorical_index_clone()" << std::endl;
5781        throw std::runtime_error("pd_test_categorical_index_clone failed");
5782    }
5783
5784    std::cout << " -> tests passed" << std::endl;
5785}
col (pd_test_1_all.cpp:6625)
6615            // Test replace
6616            {
6617                std::map<std::string, std::vector<numpy::float64>> float_data;
6618                float_data["X"] = {1.0, 2.0, 3.0};
6619                float_data["Y"] = {2.0, 2.0, 4.0};
6620                pandas::DataFrame df_repl(float_data);
6621
6622                auto replaced = df_repl.replace(2.0, 99.0);
6623                // Check some value was replaced (crude check via string)
6624                std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6625                if (val_str.find("99") == std::string::npos) {
6626                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6627                    throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6628                }
6629            }
6630
6631            // Test drop_duplicates
6632            {
6633                std::map<std::string, std::vector<numpy::int64>> dup_data;
6634                dup_data["A"] = {1, 1, 2, 2};
col (pd_test_1_all.cpp:6625)
6615            // Test replace
6616            {
6617                std::map<std::string, std::vector<numpy::float64>> float_data;
6618                float_data["X"] = {1.0, 2.0, 3.0};
6619                float_data["Y"] = {2.0, 2.0, 4.0};
6620                pandas::DataFrame df_repl(float_data);
6621
6622                auto replaced = df_repl.replace(2.0, 99.0);
6623                // Check some value was replaced (crude check via string)
6624                std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6625                if (val_str.find("99") == std::string::npos) {
6626                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6627                    throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6628                }
6629            }
6630
6631            // Test drop_duplicates
6632            {
6633                std::map<std::string, std::vector<numpy::int64>> dup_data;
6634                dup_data["A"] = {1, 1, 2, 2};
col_data (pd_test_4_all.cpp:5286)
5276namespace {
5277
5278// Build a DataFrame matching the original fixture *after* the sliced setitem.
5279// override_fn(orig) computes the replacement value for every row whose
5280// C-level (third level of the row MultiIndex) is "C1" or "C3".
5281template <typename Fn>
5282pandas::DataFrame build_slicer_post_state(Fn override_fn) {
5283    constexpr size_t N_ROWS = 64;  // 4 * 2 * 4 * 2
5284    constexpr size_t N_COLS = 4;
5285
5286    std::vector<std::vector<int64_t>> col_data(N_COLS);
5287    for (auto& v : col_data) v.reserve(N_ROWS);
5288
5289    for (size_t r = 0; r < N_ROWS; ++r) {
5290        // r decomposes as a*16 + b*8 + c*2 + d  (strides for 4·2·4·2)
5291        size_t c_lvl = (r / 2) % 4;            // 0..3 → C0..C3
5292        bool overridden = (c_lvl == 1 || c_lvl == 3);
5293        for (size_t c = 0; c < N_COLS; ++c) {
5294            int64_t orig = static_cast<int64_t>(r * N_COLS + c);
5295            col_data[c].push_back(overridden ? override_fn(orig) : orig);
5296        }
col_data (pd_test_4_all.cpp:5286)
5276namespace {
5277
5278// Build a DataFrame matching the original fixture *after* the sliced setitem.
5279// override_fn(orig) computes the replacement value for every row whose
5280// C-level (third level of the row MultiIndex) is "C1" or "C3".
5281template <typename Fn>
5282pandas::DataFrame build_slicer_post_state(Fn override_fn) {
5283    constexpr size_t N_ROWS = 64;  // 4 * 2 * 4 * 2
5284    constexpr size_t N_COLS = 4;
5285
5286    std::vector<std::vector<int64_t>> col_data(N_COLS);
5287    for (auto& v : col_data) v.reserve(N_ROWS);
5288
5289    for (size_t r = 0; r < N_ROWS; ++r) {
5290        // r decomposes as a*16 + b*8 + c*2 + d  (strides for 4·2·4·2)
5291        size_t c_lvl = (r / 2) % 4;            // 0..3 → C0..C3
5292        bool overridden = (c_lvl == 1 || c_lvl == 3);
5293        for (size_t c = 0; c < N_COLS; ++c) {
5294            int64_t orig = static_cast<int64_t>(r * N_COLS + c);
5295            col_data[c].push_back(overridden ? override_fn(orig) : orig);
5296        }
col_data (pd_test_4_all.cpp:5286)
5276namespace {
5277
5278// Build a DataFrame matching the original fixture *after* the sliced setitem.
5279// override_fn(orig) computes the replacement value for every row whose
5280// C-level (third level of the row MultiIndex) is "C1" or "C3".
5281template <typename Fn>
5282pandas::DataFrame build_slicer_post_state(Fn override_fn) {
5283    constexpr size_t N_ROWS = 64;  // 4 * 2 * 4 * 2
5284    constexpr size_t N_COLS = 4;
5285
5286    std::vector<std::vector<int64_t>> col_data(N_COLS);
5287    for (auto& v : col_data) v.reserve(N_ROWS);
5288
5289    for (size_t r = 0; r < N_ROWS; ++r) {
5290        // r decomposes as a*16 + b*8 + c*2 + d  (strides for 4·2·4·2)
5291        size_t c_lvl = (r / 2) % 4;            // 0..3 → C0..C3
5292        bool overridden = (c_lvl == 1 || c_lvl == 3);
5293        for (size_t c = 0; c < N_COLS; ++c) {
5294            int64_t orig = static_cast<int64_t>(r * N_COLS + c);
5295            col_data[c].push_back(overridden ? override_fn(orig) : orig);
5296        }
col_data (pd_test_4_all.cpp:5286)
5276namespace {
5277
5278// Build a DataFrame matching the original fixture *after* the sliced setitem.
5279// override_fn(orig) computes the replacement value for every row whose
5280// C-level (third level of the row MultiIndex) is "C1" or "C3".
5281template <typename Fn>
5282pandas::DataFrame build_slicer_post_state(Fn override_fn) {
5283    constexpr size_t N_ROWS = 64;  // 4 * 2 * 4 * 2
5284    constexpr size_t N_COLS = 4;
5285
5286    std::vector<std::vector<int64_t>> col_data(N_COLS);
5287    for (auto& v : col_data) v.reserve(N_ROWS);
5288
5289    for (size_t r = 0; r < N_ROWS; ++r) {
5290        // r decomposes as a*16 + b*8 + c*2 + d  (strides for 4·2·4·2)
5291        size_t c_lvl = (r / 2) % 4;            // 0..3 → C0..C3
5292        bool overridden = (c_lvl == 1 || c_lvl == 3);
5293        for (size_t c = 0; c < N_COLS; ++c) {
5294            int64_t orig = static_cast<int64_t>(r * N_COLS + c);
5295            col_data[c].push_back(overridden ? override_fn(orig) : orig);
5296        }
column (pd_test_1_all.cpp:22039)
22029            std::string a1 = result.iat<double>(1, col_a_idx) == -1.0 ? "ok" : "fail";
22030            std::string a2 = result.iat<double>(2, col_a_idx) == 3.0 ? "ok" : "fail";
22031            std::string a3 = result.iat<double>(3, col_a_idx) == 4.0 ? "ok" : "fail";
22032
22033            if (a0 != "ok" || a1 != "ok" || a2 != "ok" || a3 != "ok") {
22034                passed = false;
22035                error_msg = "Column A values incorrect: A[0]=" + a0 + ", A[1]=" + a1 +
22036                            ", A[2]=" + a2 + ", A[3]=" + a3;
22037            }
22038
22039            // Check B column (all should be original)
22040            double b0 = result.iat<double>(0, col_b_idx);
22041            if (b0 != 5.0) {
22042                passed = false;
22043                error_msg = "B[0] should be 5, got " + std::to_string(b0);
22044            }
22045
22046            if (!passed) {
22047                std::cout << "  [FAIL] : in pd_test_where_basic() : " << error_msg << std::endl;
22048                throw std::runtime_error("pd_test_where_basic failed: " + error_msg);
22049            }
column (pd_test_1_all.cpp:22039)
22029            std::string a1 = result.iat<double>(1, col_a_idx) == -1.0 ? "ok" : "fail";
22030            std::string a2 = result.iat<double>(2, col_a_idx) == 3.0 ? "ok" : "fail";
22031            std::string a3 = result.iat<double>(3, col_a_idx) == 4.0 ? "ok" : "fail";
22032
22033            if (a0 != "ok" || a1 != "ok" || a2 != "ok" || a3 != "ok") {
22034                passed = false;
22035                error_msg = "Column A values incorrect: A[0]=" + a0 + ", A[1]=" + a1 +
22036                            ", A[2]=" + a2 + ", A[3]=" + a3;
22037            }
22038
22039            // Check B column (all should be original)
22040            double b0 = result.iat<double>(0, col_b_idx);
22041            if (b0 != 5.0) {
22042                passed = false;
22043                error_msg = "B[0] should be 5, got " + std::to_string(b0);
22044            }
22045
22046            if (!passed) {
22047                std::cout << "  [FAIL] : in pd_test_where_basic() : " << error_msg << std::endl;
22048                throw std::runtime_error("pd_test_where_basic failed: " + error_msg);
22049            }
column_at (pd_test_3_all.cpp:23814)
23804    std::map<std::string, std::vector<double>> data = {
23805        {"A", {1.0, 2.0, 3.0}}, {"B", {10.0, 20.0, 30.0}}
23806    };
23807    pandas::DataFrame df(data);
23808    pandas::DataFrame result = df.agg("sum", 1);
23809    // Row sums: 11, 22, 33
23810    if (result.nrows() != 3) {
23811        std::cout << "  [FAIL] : in pd_test_agg_axis1_sum() : wrong nrows" << std::endl;
23812        throw std::runtime_error("pd_test_agg_axis1_sum failed: wrong nrows");
23813    }
23814    if (std::abs(result.column_at(0).get_value_double(0) - 11.0) > 1e-10 ||
23815        std::abs(result.column_at(0).get_value_double(1) - 22.0) > 1e-10 ||
23816        std::abs(result.column_at(0).get_value_double(2) - 33.0) > 1e-10) {
23817        std::cout << "  [FAIL] : in pd_test_agg_axis1_sum() : wrong row sums" << std::endl;
23818        throw std::runtime_error("pd_test_agg_axis1_sum failed: wrong row sums");
23819    }
23820    std::cout << " -> tests passed" << std::endl;
23821}
23822
23823void pd_test_agg_axis1_mean() {
23824    std::cout << "========= DataFrame.agg('mean', axis=1) ===============";
column_dtype_override (pd_test_2_all.cpp:20590)
20580    std::vector<pandas::NamedAggSpec> specs;
20581    specs.push_back({"cnt", "val", "count"});
20582
20583    auto result = gb.agg_named(specs);
20584
20585    check(result.has_column("cnt"), "count_has_col");
20586    check(approx_eq(result["cnt"].get_value_double(0), 2.0), "count_a_eq_2");
20587    check(approx_eq(result["cnt"].get_value_double(1), 3.0), "count_b_eq_3");
20588    // count should always produce int64
20589    check(result.column_dtype_override("cnt") == "int64", "count_dtype_int64");
20590}
20591
20592// =====================================================================
20593// Test: agg_named() dtype propagation — int source + sum -> int64
20594// =====================================================================
20595
20596void pd_test_groupby_apply_named_agg_dtype_int_sum() {
20597    std::cout << "  -- pd_test_groupby_apply_named_agg_dtype_int_sum --" << std::endl;
20598
20599    pandas::DataFrame df;
column_to_series_f64 (pd_test_2_all.cpp:19385)
19375              << dataframe_tests_agg_dtype::g_fail << " failed)" << std::endl;
19376
19377    return dataframe_tests_agg_dtype::g_fail;
19378}
19379
19380} // namespace dataframe_tests
19381// ------------------- pd_test_agg_dtype.cpp (end) -----------------------------
19382
19383// ------------------- pd_test_apply_extract.cpp (start) -----------------------------
19384// pd_test_apply_extract.cpp - Tests for DataFrame column/row extraction helpers
19385// column_to_series_f64(), column_to_series_str(), row_values_f64()
19386
19387#include <iostream>
19388#include <string>
19389#include <vector>
19390#include <cmath>
19391
19392#include "../pandas/pd_dataframe.h"
19393#include "../pandas/pd_series.h"
19394#include "../pandas/pd_index.h"
column_to_series_str (pd_test_2_all.cpp:19385)
19375              << dataframe_tests_agg_dtype::g_fail << " failed)" << std::endl;
19376
19377    return dataframe_tests_agg_dtype::g_fail;
19378}
19379
19380} // namespace dataframe_tests
19381// ------------------- pd_test_agg_dtype.cpp (end) -----------------------------
19382
19383// ------------------- pd_test_apply_extract.cpp (start) -----------------------------
19384// pd_test_apply_extract.cpp - Tests for DataFrame column/row extraction helpers
19385// column_to_series_f64(), column_to_series_str(), row_values_f64()
19386
19387#include <iostream>
19388#include <string>
19389#include <vector>
19390#include <cmath>
19391
19392#include "../pandas/pd_dataframe.h"
19393#include "../pandas/pd_series.h"
19394#include "../pandas/pd_index.h"
column_to_typed_series (pd_test_3_all.cpp:28921)
28911    fail += dfa_check(rd.numeric_vals[1] == 30.0, "numeric_vals[1]==30.0");
28912    fail += dfa_check(rd.numeric_vals[2] == 100.0, "numeric_vals[2]==100.0");
28913    if (fail == 0) std::cout << "    OK" << std::endl;
28914    if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_extract_row_typed_mixed failed");
28915}
28916
28917void pd_test_df_apply_assembly_column_to_typed_series_float() {
28918    std::cout << "  -- pd_test_df_apply_assembly_column_to_typed_series_float --" << std::endl;
28919    int fail = 0;
28920    auto df = make_numeric_df();
28921    auto s = df.column_to_typed_series(0);
28922    fail += dfa_check(s != nullptr, "series not null");
28923    fail += dfa_check(s->size() == 3, "size == 3");
28924    auto* fs = dynamic_cast<pandas::Series<numpy::float64>*>(s.get());
28925    fail += dfa_check(fs != nullptr, "dynamic_cast<Series<f64>>");
28926    if (fs) {
28927        fail += dfa_check((*fs)[0] == 1.0 && (*fs)[2] == 3.0, "values correct");
28928    }
28929    if (fail == 0) std::cout << "    OK" << std::endl;
28930    if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_column_to_typed_series_float failed");
28931}
columns (pd_test_1_all.cpp:6220)
6210                throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211            }
6212
6213            // Test nbytes > 0 for non-empty
6214            if (df.nbytes() == 0) {
6215                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216                throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217            }
6218
6219            // Test columns index
6220            if (df.columns().size() != 3) {
6221                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222                throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223            }
6224
6225            // Test dtypes
6226            auto dtypes = df.dtypes();
6227            if (dtypes.size() != 3) {
6228                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : dtypes size != 3" << std::endl;
6229                throw std::runtime_error("pd_test_dataframe_properties failed: dtypes size != 3");
6230            }
columns_index_dtype (pd_test_3_all.cpp:23051)
23041    df2.add_column<double>("B", {3.0, 4.0});
23042    pandas::construct_helpers::apply_column_index_name(df2, "");
23043    // name should remain unset or empty
23044    if (df2.columns().name().has_value() && !df2.columns().name()->empty())
23045        throw std::runtime_error("test_apply_column_metadata: empty name should not set columns_name");
23046
23047    // apply_int_column_dtype with true
23048    pandas::DataFrame df3;
23049    df3.add_column<double>("C", {5.0, 6.0});
23050    pandas::construct_helpers::apply_int_column_dtype(df3, true);
23051    if (df3.columns_index_dtype() != "int64")
23052        throw std::runtime_error("test_apply_column_metadata: columns_index_dtype should be 'int64'");
23053
23054    // apply_int_column_dtype with false -> no change
23055    pandas::DataFrame df4;
23056    df4.add_column<double>("D", {7.0, 8.0});
23057    pandas::construct_helpers::apply_int_column_dtype(df4, false);
23058    if (df4.columns_index_dtype() == "int64")
23059        throw std::runtime_error("test_apply_column_metadata: false flag should not set int64 dtype");
23060
23061    std::cout << "    -> test passed" << std::endl;
columns_level_names (pd_test_5_all.cpp:33080)
33070        // Flat: just dump columns_index_
33071        std::cout << tag << " flat_columns=[";
33072        for (size_t c = 0; c < df.ncols(); ++c) {
33073            if (c) std::cout << ", ";
33074            std::cout << "\"" << df.columns().get_value_str(c) << "\"";
33075        }
33076        std::cout << "]\n";
33077        return;
33078    }
33079    const auto& levels = df.columns_levels();
33080    const auto& lnames = df.columns_level_names();
33081    for (size_t lvl = 0; lvl < levels.size(); ++lvl) {
33082        std::cout << tag << " level[" << lvl << "]"
33083                  << " name=\""
33084                  << (lvl < lnames.size() ? lnames[lvl] : std::string("<?>"))
33085                  << "\" values=[";
33086        for (size_t c = 0; c < levels[lvl].size(); ++c) {
33087            if (c) std::cout << ", ";
33088            std::cout << "\"" << levels[lvl][c] << "\"";
33089        }
33090        std::cout << "]\n";
columns_levels (pd_test_3_all.cpp:23276)
23266    // Create a df with 3 columns
23267    std::map<std::string, std::vector<double>> data = {
23268        {"a\tx", {1.0}}, {"a\ty", {2.0}}, {"b\tx", {3.0}}
23269    };
23270    pandas::DataFrame df(data);
23271
23272    pandas::construct_helpers::apply_multiindex_columns(df, mi);
23273
23274    // Verify columns_levels
23275    if (!df.has_columns_levels()) throw std::runtime_error("expected columns_levels");
23276    const auto& levels = df.columns_levels();
23277    if (levels.size() != 2) throw std::runtime_error("expected 2 levels, got " + std::to_string(levels.size()));
23278    if (levels[0].size() != 3) throw std::runtime_error("expected 3 entries in level 0");
23279    if (levels[0][0] != "a" || levels[0][2] != "b") throw std::runtime_error("wrong level 0 values");
23280    if (levels[1][0] != "x" || levels[1][1] != "y") throw std::runtime_error("wrong level 1 values");
23281
23282    std::cout << "    -> test passed" << std::endl;
23283}
23284
23285void test_reconstruct_multiindex() {
23286    std::cout << "========= reconstruct_multiindex ======================";
columns_name (pd_test_3_all.cpp:10809)
10799    df.add_column<int64_t>("B", {4, 5, 6});
10800    auto s = df.shape_2d();
10801    if (s.first != 3 || s.second != 2) {
10802        std::cout << "  [FAIL] : in pd_test_3_all_shape_2d() : wrong dimensions" << std::endl;
10803        throw std::runtime_error("pd_test_3_all_shape_2d failed");
10804    }
10805    std::cout << " -> tests passed" << std::endl;
10806}
10807
10808void pd_test_3_all_columns_name_setter() {
10809    std::cout << "========= DataFrame.columns_name(string) setter ========";
10810    pandas::DataFrame df;
10811    df.add_column<int64_t>("A", {1, 2, 3});
10812    df.columns_name("MyColumns");
10813    pandas::DataFrame df2;
10814    df2.add_column<int64_t>("A", {1, 2, 3});
10815    df2.set_columns_name("MyColumns");
10816    if (df.to_string() != df2.to_string()) {
10817        std::cout << "  [FAIL] : in pd_test_3_all_columns_name_setter() : mismatch" << std::endl;
10818        throw std::runtime_error("pd_test_3_all_columns_name_setter failed");
10819    }
compute_string_column_stats (pd_test_2_all.cpp:19759)
19749              << "] pd_test_broadcasting test suite ==========================" << std::endl;
19750
19751    return dataframe_tests_broadcasting::g_fail;
19752}
19753
19754} // namespace dataframe_tests
19755// ------------------- pd_test_broadcasting.cpp (end) -----------------------------
19756
19757// ------------------- pd_test_describe.cpp (start) -----------------------------
19758// pd_test_describe.cpp - Tests for describe_full() migration
19759// StringColumnStats, compute_string_column_stats(), describe_full() modes
19760
19761#include <iostream>
19762#include <string>
19763#include <vector>
19764#include <cmath>
19765
19766#include "../pandas/pd_dataframe.h"
19767#include "../pandas/pd_series.h"
19768#include "../pandas/pd_index.h"
corr (pd_test_1_all.cpp:4655)
4645        }
4646
4647        void pd_test_aggregation_dataframe_corr() {
4648            std::cout << "========= DataFrame corr ========================";
4649
4650            std::map<std::string, std::vector<double>> data;
4651            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4652            data["B"] = {2.0, 4.0, 6.0, 8.0, 10.0};  // Perfect correlation
4653            pandas::DataFrame df(data);
4654
4655            auto corr_df = df.corr();
4656
4657            // Check dimensions
4658            bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659            if (!passed) {
4660                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662            }
4663
4664            // Diagonal should be 1.0
4665            std::string aa = corr_df["A"].get_value_str(0);
corrwith (pd_test_2_all.cpp:2013)
2003        std::cout << "====================================== [OK] pd_test_combine test suite ========================== " << std::endl;
2004        return 0;
2005    }
2006
2007} // namespace dataframe_tests
2008// ------------------- pd_test_combine.cpp (end) -----------------------------
2009
2010// ------------------- pd_test_corrwith.cpp (start) -----------------------------
2011// dataframe_tests/pd_test_corrwith.cpp
2012// Test suite for DataFrame::corrwith() method
2013
2014#include <iostream>
2015#include <stdexcept>
2016#include <cmath>
2017#include "../pandas/pd_dataframe.h"
2018
2019namespace dataframe_tests {
2020    namespace dataframe_tests_corrwith {
2021
2022        // Helper to check if two doubles are approximately equal
corrwith (pd_test_2_all.cpp:2013)
2003        std::cout << "====================================== [OK] pd_test_combine test suite ========================== " << std::endl;
2004        return 0;
2005    }
2006
2007} // namespace dataframe_tests
2008// ------------------- pd_test_combine.cpp (end) -----------------------------
2009
2010// ------------------- pd_test_corrwith.cpp (start) -----------------------------
2011// dataframe_tests/pd_test_corrwith.cpp
2012// Test suite for DataFrame::corrwith() method
2013
2014#include <iostream>
2015#include <stdexcept>
2016#include <cmath>
2017#include "../pandas/pd_dataframe.h"
2018
2019namespace dataframe_tests {
2020    namespace dataframe_tests_corrwith {
2021
2022        // Helper to check if two doubles are approximately equal
cov (pd_test_1_all.cpp:4690)
4680            std::cout << " -> tests passed" << std::endl;
4681        }
4682
4683        void pd_test_aggregation_dataframe_cov() {
4684            std::cout << "========= DataFrame cov =========================";
4685
4686            std::map<std::string, std::vector<double>> data;
4687            data["A"] = {1.0, 2.0, 3.0};
4688            pandas::DataFrame df(data);
4689
4690            auto cov_df = df.cov();
4691
4692            // Check dimensions
4693            bool passed = cov_df.nrows() == 1 && cov_df.ncols() == 1;
4694            if (!passed) {
4695                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_cov() : cov should be 1x1" << std::endl;
4696                throw std::runtime_error("pd_test_aggregation_dataframe_cov failed: cov should be 1x1");
4697            }
4698
4699            // Var(A) = 1.0 with ddof=1
4700            std::string aa = cov_df["A"].get_value_str(0);
dt (pd_test_3_all.cpp:18239)
18229    if (offset.freqstr() != "D") {
18230        std::cout << "  [FAIL] : Day freqstr() failed" << std::endl;
18231        throw std::runtime_error("pd_test_day_offset: freqstr() failed");
18232    }
18233    if (offset.name() != "Day") {
18234        std::cout << "  [FAIL] : Day name() failed" << std::endl;
18235        throw std::runtime_error("pd_test_day_offset: name() failed");
18236    }
18237
18238    // Test apply
18239    numpy::datetime64 dt("2020-01-15");
18240    auto result = offset.apply(dt);
18241    std::tm tm = result.toTm();
18242    if (tm.tm_mday != 20) {
18243        std::cout << "  [FAIL] : Day apply() failed, got day " << tm.tm_mday << std::endl;
18244        throw std::runtime_error("pd_test_day_offset: apply() failed");
18245    }
18246
18247    std::cout << " -> tests passed" << std::endl;
18248}
dtype_name (pd_test_1_all.cpp:10104)
10094}
10095
10096void pd_test_extension_index_array_constructor() {
10097    std::cout << "========= array constructor =========================";
10098
10099    pandas::CategoricalArray arr({"apple", "banana", "apple", "cherry"});
10100    pandas::CategoricalIndex idx(arr, "fruits");
10101
10102    bool passed = (idx.size() == 4 && !idx.empty() &&
10103                   idx.name().has_value() && *idx.name() == "fruits" &&
10104                   idx.dtype_name() == "category");
10105    if (!passed) {
10106        std::cout << "  [FAIL] : in pd_test_extension_index_array_constructor() : array constructor check failed" << std::endl;
10107        throw std::runtime_error("pd_test_extension_index_array_constructor failed");
10108    }
10109
10110    std::cout << " -> tests passed" << std::endl;
10111}
10112
10113void pd_test_extension_index_copy_constructor() {
10114    std::cout << "========= copy constructor =========================";
dtype_string (pd_test_5_all.cpp:93010)
93000    pandas_tests::check(threw_neg,
93001        "f_test_25_iat_resolved_out_of_range_throws_927551.neg_throws",
93002        local_fail);
93003}
93004
93005void case_27_propagate_dtype_nan_suppresses() {
93006    // has_nan_from_upcasting=true => no propagation (column dtype unchanged).
93007    std::cout << "-- case_27_propagate_dtype_nan_suppresses()\n";
93008    int local_fail = 0;
93009    auto df = make_1row_df();
93010    std::string col_before = df.dtype_string("x");
93011
93012    pandas::AlignmentResult ar;
93013    ar.kind = pandas::SetItemResult::PARTIAL_ALIGN;
93014    ar.has_nan_from_upcasting = true;
93015    ar.aligned_values = {std::numeric_limits<double>::quiet_NaN()};
93016
93017    pandas::propagate_aligned_dtype<std::int64_t>(df, std::string("x"), ar,
93018                                                  std::string("int64"));
93019    std::string col_after = df.dtype_string("x");
93020    pandas_tests::check(col_before == col_after,
dtypes (pd_test_1_all.cpp:6226)
6216                throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217            }
6218
6219            // Test columns index
6220            if (df.columns().size() != 3) {
6221                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222                throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223            }
6224
6225            // Test dtypes
6226            auto dtypes = df.dtypes();
6227            if (dtypes.size() != 3) {
6228                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : dtypes size != 3" << std::endl;
6229                throw std::runtime_error("pd_test_dataframe_properties failed: dtypes size != 3");
6230            }
6231
6232            std::cout << " -> tests passed" << std::endl;
6233        }
6234
6235        // =====================================================================
6236        // Test: Column Access
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937    void pd_test_config_version() {
938        std::cout << "========= df_config: version info ======================= ";
939        const char* version = pandas::DataFrameInfo::version();
940        if (version == nullptr || std::string(version).empty()) {
941            std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942            throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943        }
944        std::cout << "-> tests passed" << std::endl;
945    }
946
947    void pd_test_config_na_repr() {
948        std::cout << "========= df_config: NA representation ======================= ";
949        const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950        if (na_repr == nullptr) {
eval (pd_test_2_all.cpp:2722)
2712        std::cout << "====================================== [OK] pd_test_droplevel test suite ========================== " << std::endl;
2713        return 0;
2714    }
2715
2716} // namespace dataframe_tests
2717// ------------------- pd_test_droplevel.cpp (end) -----------------------------
2718
2719// ------------------- pd_test_eval.cpp (start) -----------------------------
2720// dataframe_tests/pd_test_eval.cpp
2721// Test for DataFrame.eval() method
2722
2723#include <iostream>
2724#include <stdexcept>
2725#include <cmath>
2726#include <vector>
2727#include <map>
2728#include <string>
2729#include <limits>
2730#include <variant>
2731#include "../pandas/pd_dataframe.h"
extract_column_as_numeric_series (pd_test_2_all.cpp:20328)
20318// =========================================================================
20319
20320void pd_test_getitem_dispatch_extract_numeric() {
20321    std::cout << "pd_test_getitem_dispatch_extract_numeric" << std::endl;
20322    pandas::DataFrame df;
20323    std::vector<numpy::float64> vals = {1.5, 2.5, 3.5};
20324    df.insert(0, "val", std::make_unique<pandas::Series<numpy::float64>>(vals, "val"), true);
20325    df.set_index(std::make_unique<pandas::Index<std::string>>(
20326        std::vector<std::string>{"a", "b", "c"}));
20327
20328    auto s = df.extract_column_as_numeric_series("val");
20329    check(s.size() == 3, "numeric size");
20330    check(std::abs(s.values().getElementAt({0}) - 1.5) < 1e-10, "numeric val[0]");
20331    check(std::abs(s.values().getElementAt({2}) - 3.5) < 1e-10, "numeric val[2]");
20332    check(s.name() == "val", "numeric name");
20333
20334    // int32 dtype override preserved
20335    pandas::DataFrame df2;
20336    std::vector<numpy::float64> ivals = {10.0, 20.0};
20337    auto is = std::make_unique<pandas::Series<numpy::float64>>(ivals, "ival");
20338    is->set_dtype_override("int32");
extract_row_typed (pd_test_3_all.cpp:28887)
28877    df.add_column("Name", std::vector<std::string>{"Alice", "Bob", "Carol"});
28878    df.add_column("Age", std::vector<numpy::float64>{30.0, 25.0, 40.0});
28879    df.add_column("Salary", std::vector<numpy::float64>{100.0, 200.0, 300.0});
28880    return df;
28881}
28882
28883void pd_test_df_apply_assembly_extract_row_typed_numeric() {
28884    std::cout << "  -- pd_test_df_apply_assembly_extract_row_typed_numeric --" << std::endl;
28885    int fail = 0;
28886    auto df = make_numeric_df();
28887    auto rd = df.extract_row_typed(1);
28888    fail += dfa_check(rd.size() == 3, "row size == 3");
28889    fail += dfa_check(!rd.has_string_cols, "all numeric => !has_string_cols");
28890    fail += dfa_check(rd.col_names[0] == "A" && rd.col_names[2] == "C", "col_names");
28891    fail += dfa_check(!rd.is_string_col[0] && !rd.is_string_col[1] && !rd.is_string_col[2],
28892                      "is_string_col all false");
28893    fail += dfa_check(rd.numeric_vals[0] == 2.0, "numeric_vals[0]==2.0");
28894    fail += dfa_check(rd.numeric_vals[1] == 5.0, "numeric_vals[1]==5.0");
28895    fail += dfa_check(rd.numeric_vals[2] == 8.0, "numeric_vals[2]==8.0");
28896    if (fail == 0) std::cout << "    OK" << std::endl;
28897    if (fail != 0) throw std::runtime_error("pd_test_df_apply_assembly_extract_row_typed_numeric failed");
file (pd_test_2_all.cpp:3463)
3453                {"C", {100, 200, 300, 400, 500}}
3454            };
3455
3456            pandas::DataFrame df(data);
3457
3458            // Export to Excel
3459            std::string filepath = "temp/pd_test_excel_basic.xlsx";
3460            df.to_excel(filepath);
3461
3462            // Verify file was created
3463            std::ifstream file(filepath, std::ios::binary);
3464            if (!file.good()) {
3465                std::cout << "  [FAIL] : in pd_test_excel_basic() : File was not created" << std::endl;
3466                throw std::runtime_error("pd_test_excel_basic failed: file not created");
3467            }
3468
3469            // Check file size is reasonable (valid XLSX should be > 1KB)
3470            file.seekg(0, std::ios::end);
3471            auto size = file.tellg();
3472            if (size < 1000) {
3473                std::cout << "  [FAIL] : in pd_test_excel_basic() : File size too small: " << size << std::endl;
filter (pd_test_3_all.cpp:2805)
2795        threw = true;
2796    }
2797    if (!threw) {
2798        throw std::runtime_error("bool_() should throw for multi-element DataFrame");
2799    }
2800
2801    std::cout << " -> tests passed" << std::endl;
2802}
2803
2804void pd_test_3_all_df_filter() {
2805    std::cout << "========= DataFrame.filter() =============================";
2806
2807    std::map<std::string, std::vector<double>> data = {
2808        {"col_a", {1.0, 2.0, 3.0}},
2809        {"col_b", {4.0, 5.0, 6.0}},
2810        {"other", {7.0, 8.0, 9.0}}
2811    };
2812    pandas::DataFrame df(data);
2813
2814    // Test filter by items
2815    pandas::DataFrame filtered_items = df.filter({"col_a", "col_b"});
flags (pd_test_1_all.cpp:16397)
16387        // =====================================================================
16388        // Series Flags Integration Tests
16389        // =====================================================================
16390
16391        void pd_test_ndframe_series_flags() {
16392            std::cout << "========= series flags integration =============================" << std::endl;
16393
16394            pandas::Series<int> s({1, 2, 3});
16395
16396            // Test default flags
16397            bool passed = s.flags().allows_duplicate_labels == true;
16398            if (!passed) {
16399                std::cout << "  [FAIL] : in pd_test_ndframe_series_flags() : default allows_duplicate_labels" << std::endl;
16400                throw std::runtime_error("pd_test_ndframe_series_flags failed: default allows_duplicate_labels");
16401            }
16402
16403            passed = s.flags().copy_on_write == false;
16404            if (!passed) {
16405                std::cout << "  [FAIL] : in pd_test_ndframe_series_flags() : default copy_on_write" << std::endl;
16406                throw std::runtime_error("pd_test_ndframe_series_flags failed: default copy_on_write");
16407            }
floor (pd_test_1_all.cpp:4942)
4932            pandas::Series<double> a({1.4, 2.5, 3.6, -1.4, -2.5});
4933
4934            auto r = a.round();
4935            bool passed = std::abs(r[0] - 1.0) < 0.001 && std::abs(r[2] - 4.0) < 0.001;
4936            if (!passed) {
4937                std::cout << "  [FAIL] : in pd_test_arithmetic_series_round() : round failed" << std::endl;
4938                throw std::runtime_error("pd_test_arithmetic_series_round failed: round failed");
4939            }
4940
4941            auto f = a.floor();
4942            passed = std::abs(f[0] - 1.0) < 0.001 && std::abs(f[2] - 3.0) < 0.001 && std::abs(f[3] - (-2.0)) < 0.001;
4943            if (!passed) {
4944                std::cout << "  [FAIL] : in pd_test_arithmetic_series_round() : floor failed" << std::endl;
4945                throw std::runtime_error("pd_test_arithmetic_series_round failed: floor failed");
4946            }
4947
4948            auto c = a.ceil();
4949            passed = std::abs(c[0] - 2.0) < 0.001 && std::abs(c[2] - 4.0) < 0.001 && std::abs(c[3] - (-1.0)) < 0.001;
4950            if (!passed) {
4951                std::cout << "  [FAIL] : in pd_test_arithmetic_series_round() : ceil failed" << std::endl;
format_percentile_labels (pd_test_2_all.cpp:19921)
19911}
19912
19913// =====================================================================
19914// Test: percentile label formatting
19915// =====================================================================
19916
19917void pd_test_describe_percentile_labels() {
19918    std::cout << "  -- pd_test_describe_percentile_labels --" << std::endl;
19919
19920    // Standard percentiles
19921    auto labels = pandas::DataFrame::format_percentile_labels({0.25, 0.5, 0.75});
19922    check(labels.size() == 3, "pct_labels_count_3");
19923    check(labels[0] == "25%", "pct_label_25");
19924    check(labels[1] == "50%", "pct_label_50");
19925    check(labels[2] == "75%", "pct_label_75");
19926
19927    // Non-integer percentile
19928    auto labels2 = pandas::DataFrame::format_percentile_labels({0.333});
19929    check(labels2[0] == "33.3%", "pct_label_33_3");
19930}
frame_dtype_override (pd_test_5_all.cpp:146164)
146154    };
146155}
146156
146157void case_100_apply_int_all_int_axis0(int& lf) {
146158    std::cout << "-- case_100_apply_int_all_int_axis0\n";
146159    auto df = mk_df_one_int64("a", {1.0, 2.0, 3.0});
146160    auto h = mk_hist_all_int();
146161    auto out = df.apply_resolved_typed(cb_int_sum(), h, 0);
146162    pandas_tests::check(out.column_dtype_override("a") == "int64",
146163        "C_26k_case_100_apply_int_all_int_axis0()_a", lf);
146164    pandas_tests::check(out.frame_dtype_override() == "int64",
146165        "C_26k_case_100_apply_int_all_int_axis0()_frame", lf);
146166}
146167
146168void case_101_apply_float_all_int_axis0(int& lf) {
146169    std::cout << "-- case_101_apply_float_all_int_axis0\n";
146170    auto df = mk_df_one_float64("a", {1.5, 2.5, 3.5});
146171    auto h = mk_hist_all_int();
146172    auto out = df.apply_resolved_typed(cb_int_sum(), h, 0);
146173    // plan_02: all-int returns infer int64 even on a float64 source
146174    // (pandas 2.2.2: df(float64).apply(int(sum),axis=0).dtype == int64).
has_column (pd_test_1_all.cpp:6256)
6246            pandas::DataFrame df(data);
6247
6248            // Test single column access
6249            const pandas::NDFrameBase& col_a = df["A"];
6250            if (col_a.size() != 3) {
6251                std::cout << "  [FAIL] : in pd_test_dataframe_column_access() : column A size != 3" << std::endl;
6252                throw std::runtime_error("pd_test_dataframe_column_access failed: column A size != 3");
6253            }
6254
6255            // Test has_column
6256            if (!df.has_column("A")) {
6257                std::cout << "  [FAIL] : in pd_test_dataframe_column_access() : has_column A should be true" << std::endl;
6258                throw std::runtime_error("pd_test_dataframe_column_access failed: has_column A should be true");
6259            }
6260            if (df.has_column("Z")) {
6261                std::cout << "  [FAIL] : in pd_test_dataframe_column_access() : has_column Z should be false" << std::endl;
6262                throw std::runtime_error("pd_test_dataframe_column_access failed: has_column Z should be false");
6263            }
6264
6265            // Test multiple column access
6266            pandas::DataFrame subset = df[std::vector<std::string>{"A", "C"}];
has_columns_levels (pd_test_2_all.cpp:21382)
21372    auto cols = result.columns().to_list();
21373    bool has_sum = false, has_mean = false;
21374    for (const auto& c : cols) {
21375        if (c.find("sum") != std::string::npos) has_sum = true;
21376        if (c.find("mean") != std::string::npos) has_mean = true;
21377    }
21378    check(has_sum, "has_sum_cols");
21379    check(has_mean, "has_mean_cols");
21380
21381    // Check that it has multi-level columns
21382    check(result.has_columns_levels(), "has_columns_levels");
21383
21384    // Verify values: check that expected sum values exist somewhere in the result
21385    // Column names pattern: (sum, A), (sum, B), (mean, A), (mean, B)
21386    // or (sum, (sales, A)), etc. depending on single/multi value columns
21387    auto all_cols = result.columns().to_list();
21388    bool found_sum_col = false;
21389    for (const auto& c : all_cols) {
21390        if (c.find("sum") != std::string::npos) {
21391            const auto& col_ref = result[c];
21392            for (size_t r = 0; r < result.nrows(); ++r) {
has_multiindex (pd_test_1_all.cpp:27019)
27009            std::map<std::string, std::vector<std::string>> data = {
27010                {"A", {"a", "a", "b", "b"}},
27011                {"B", {"x", "x", "y", "y"}}
27012            };
27013            pandas::DataFrame df(data);
27014
27015            auto result = df.value_counts();
27016            auto& counts = std::get<pandas::Series<numpy::int64>>(result);
27017
27018            if (!counts.has_multiindex()) {
27019                std::cout << "  [FAIL] : expected MultiIndex" << std::endl;
27020                throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: no multiindex");
27021            }
27022
27023            const auto& midx = counts.multiindex();
27024
27025            // Should have 2 levels
27026            if (midx.nlevels() != 2) {
27027                std::cout << "  [FAIL] : expected 2 levels, got " << midx.nlevels() << std::endl;
27028                throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: wrong nlevels");
hasnans (pd_test_1_all.cpp:5363)
5353void pd_test_categorical_index_from_codes() {
5354    std::cout << "========= from_codes =================================";
5355
5356    std::vector<numpy::int32> codes = {0, 1, 0, 2, -1};  // -1 = NA
5357    std::vector<std::string> categories = {"low", "medium", "high"};
5358
5359    pandas::CategoricalIndex idx = pandas::CategoricalIndex::from_codes(codes, categories, true, "level");
5360
5361    bool passed = (idx.size() == 5 && idx.num_categories() == 3 &&
5362                   idx.ordered() && idx.name().has_value() && *idx.name() == "level" &&
5363                   idx.hasnans());  // has NA from code -1
5364    if (!passed) {
5365        std::cout << "  [FAIL] : in pd_test_categorical_index_from_codes()" << std::endl;
5366        throw std::runtime_error("pd_test_categorical_index_from_codes failed");
5367    }
5368
5369    std::cout << " -> tests passed" << std::endl;
5370}
5371
5372void pd_test_categorical_index_simple_new() {
5373    std::cout << "========= _simple_new =================================";
index (pd_test_1_all.cpp:6680)
6670        void pd_test_dataframe_index_ops() {
6671            std::cout << "========= index operations =================";
6672
6673            // Test set_axis (rows)
6674            {
6675                std::map<std::string, std::vector<int>> data;
6676                data["A"] = {1, 2, 3};
6677                pandas::DataFrame df(data);
6678
6679                auto renamed = df.set_axis({"x", "y", "z"}, 0);
6680                std::string idx0 = renamed.index().get_value_str(0);
6681                if (idx0 != "x") {
6682                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6683                    throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
6684                }
6685            }
6686
6687            // Test set_axis (columns)
6688            {
6689                std::map<std::string, std::vector<int>> data;
6690                data["A"] = {1, 2};
index_name (pd_test_3_all.cpp:1290)
1280        }
1281
1282        pandas::DataFrame df3;
1283        df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1284        df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1285        if (df3.index().get_value_str(4) != "b") {
1286            throw std::runtime_error("set_index_from_strings: values wrong");
1287        }
1288    }
1289
1290    // E. set_index_name + index_name() getter/setter
1291    {
1292        pandas::DataFrame df;
1293        df.add_column<int64_t>("val", {1, 2, 3});
1294        df.set_index_from_list({"r0", "r1", "r2"});
1295
1296        df.set_index_name("City");
1297        if (!df.index_name().has_value() || df.index_name().value() != "City") {
1298            throw std::runtime_error("set_index_name: round-trip failed");
1299        }
index_name (pd_test_3_all.cpp:1290)
1280        }
1281
1282        pandas::DataFrame df3;
1283        df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1284        df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1285        if (df3.index().get_value_str(4) != "b") {
1286            throw std::runtime_error("set_index_from_strings: values wrong");
1287        }
1288    }
1289
1290    // E. set_index_name + index_name() getter/setter
1291    {
1292        pandas::DataFrame df;
1293        df.add_column<int64_t>("val", {1, 2, 3});
1294        df.set_index_from_list({"r0", "r1", "r2"});
1295
1296        df.set_index_name("City");
1297        if (!df.index_name().has_value() || df.index_name().value() != "City") {
1298            throw std::runtime_error("set_index_name: round-trip failed");
1299        }
isetitem (pd_test_3_all.cpp:2882)
2872    // Verify first element
2873    if (std::abs(arr.getElementAt({0, 0}) - 1.0) > 0.001) {
2874        throw std::runtime_error("to_numpy value failed");
2875    }
2876
2877    std::cout << " -> tests passed" << std::endl;
2878}
2879
2880void pd_test_3_all_df_isetitem() {
2881    std::cout << "========= DataFrame.isetitem() ===========================";
2882
2883    std::map<std::string, std::vector<double>> data = {
2884        {"A", {1.0, 2.0, 3.0}},
2885        {"B", {4.0, 5.0, 6.0}}
2886    };
2887    pandas::DataFrame df(data);
2888
2889    // Set first column to new values
2890    df.isetitem(static_cast<size_t>(0), std::vector<double>{10.0, 20.0, 30.0});
iss (pd_test_2_all.cpp:10348)
10338            try {
10339                df.to_clipboard(true, '\t', false);  // index=false
10340            } catch (const std::exception& e) {
10341                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343            }
10344
10345#ifdef _WIN32
10346            std::string clipboard = get_clipboard_text_with_retry();
10347            // Without index, first line should be just "A"
10348            std::istringstream iss(clipboard);
10349            std::string first_line;
10350            std::getline(iss, first_line);
10351            passed = first_line == "A";
10352            if (!passed) {
10353                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355            }
10356#endif
10357
10358            std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338            try {
10339                df.to_clipboard(true, '\t', false);  // index=false
10340            } catch (const std::exception& e) {
10341                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343            }
10344
10345#ifdef _WIN32
10346            std::string clipboard = get_clipboard_text_with_retry();
10347            // Without index, first line should be just "A"
10348            std::istringstream iss(clipboard);
10349            std::string first_line;
10350            std::getline(iss, first_line);
10351            passed = first_line == "A";
10352            if (!passed) {
10353                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355            }
10356#endif
10357
10358            std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338            try {
10339                df.to_clipboard(true, '\t', false);  // index=false
10340            } catch (const std::exception& e) {
10341                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343            }
10344
10345#ifdef _WIN32
10346            std::string clipboard = get_clipboard_text_with_retry();
10347            // Without index, first line should be just "A"
10348            std::istringstream iss(clipboard);
10349            std::string first_line;
10350            std::getline(iss, first_line);
10351            passed = first_line == "A";
10352            if (!passed) {
10353                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355            }
10356#endif
10357
10358            std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338            try {
10339                df.to_clipboard(true, '\t', false);  // index=false
10340            } catch (const std::exception& e) {
10341                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343            }
10344
10345#ifdef _WIN32
10346            std::string clipboard = get_clipboard_text_with_retry();
10347            // Without index, first line should be just "A"
10348            std::istringstream iss(clipboard);
10349            std::string first_line;
10350            std::getline(iss, first_line);
10351            passed = first_line == "A";
10352            if (!passed) {
10353                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355            }
10356#endif
10357
10358            std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338            try {
10339                df.to_clipboard(true, '\t', false);  // index=false
10340            } catch (const std::exception& e) {
10341                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343            }
10344
10345#ifdef _WIN32
10346            std::string clipboard = get_clipboard_text_with_retry();
10347            // Without index, first line should be just "A"
10348            std::istringstream iss(clipboard);
10349            std::string first_line;
10350            std::getline(iss, first_line);
10351            passed = first_line == "A";
10352            if (!passed) {
10353                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355            }
10356#endif
10357
10358            std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338            try {
10339                df.to_clipboard(true, '\t', false);  // index=false
10340            } catch (const std::exception& e) {
10341                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343            }
10344
10345#ifdef _WIN32
10346            std::string clipboard = get_clipboard_text_with_retry();
10347            // Without index, first line should be just "A"
10348            std::istringstream iss(clipboard);
10349            std::string first_line;
10350            std::getline(iss, first_line);
10351            passed = first_line == "A";
10352            if (!passed) {
10353                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355            }
10356#endif
10357
10358            std::cout << " -> tests passed" << std::endl;
iss (pd_test_2_all.cpp:10348)
10338            try {
10339                df.to_clipboard(true, '\t', false);  // index=false
10340            } catch (const std::exception& e) {
10341                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : exception: " << e.what() << std::endl;
10342                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10343            }
10344
10345#ifdef _WIN32
10346            std::string clipboard = get_clipboard_text_with_retry();
10347            // Without index, first line should be just "A"
10348            std::istringstream iss(clipboard);
10349            std::string first_line;
10350            std::getline(iss, first_line);
10351            passed = first_line == "A";
10352            if (!passed) {
10353                std::cout << "  [FAIL] : in pd_test_to_clipboard_no_index() : first line was '" << first_line << "', expected 'A'" << std::endl;
10354                throw std::runtime_error("pd_test_to_clipboard_no_index failed");
10355            }
10356#endif
10357
10358            std::cout << " -> tests passed" << std::endl;
match_multiindex_columns (pd_test_3_all.cpp:29247)
29237    pandas::DataFrame df;
29238    df.add_column<numpy::float64>("c0", {1.0, 2.0});
29239    df.add_column<numpy::float64>("c1", {3.0, 4.0});
29240    df.add_column<numpy::float64>("c2", {5.0, 6.0});
29241    std::vector<std::vector<std::string>> levels = {
29242        {"bar", "bar", "foo"},
29243        {"one", "two", "one"}
29244    };
29245    df.set_columns_levels(levels, {"L0", "L1"});
29246
29247    auto exact = df.match_multiindex_columns({"bar", "one"});
29248    fail += sm_check(exact.exact_match, "exact match flag set");
29249    fail += sm_check(exact.matching_cols.size() == 1 && exact.matching_cols[0] == 0, "exact match col index");
29250
29251    auto partial = df.match_multiindex_columns({"bar"});
29252    fail += sm_check(!partial.exact_match, "partial match not exact");
29253    fail += sm_check(partial.matching_cols.size() == 2, "partial match 2 cols");
29254    fail += sm_check(partial.matching_cols[0] == 0 && partial.matching_cols[1] == 1, "partial col indices");
29255
29256    auto none = df.match_multiindex_columns({"baz"});
29257    fail += sm_check(none.matching_cols.empty(), "no match -> empty");
nbytes (pd_test_1_all.cpp:6214)
6204            }
6205
6206            // Test empty DataFrame
6207            pandas::DataFrame empty_df;
6208            if (!empty_df.empty()) {
6209                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210                throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211            }
6212
6213            // Test nbytes > 0 for non-empty
6214            if (df.nbytes() == 0) {
6215                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216                throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217            }
6218
6219            // Test columns index
6220            if (df.columns().size() != 3) {
6221                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222                throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223            }
ncols (pd_test_1_all.cpp:4658)
4648            std::cout << "========= DataFrame corr ========================";
4649
4650            std::map<std::string, std::vector<double>> data;
4651            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4652            data["B"] = {2.0, 4.0, 6.0, 8.0, 10.0};  // Perfect correlation
4653            pandas::DataFrame df(data);
4654
4655            auto corr_df = df.corr();
4656
4657            // Check dimensions
4658            bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659            if (!passed) {
4660                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662            }
4663
4664            // Diagonal should be 1.0
4665            std::string aa = corr_df["A"].get_value_str(0);
4666            passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667            if (!passed) {
4668                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
ndim (pd_test_1_all.cpp:6195)
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
6199
6200            // Test empty
6201            if (df.empty()) {
6202                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203                throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204            }
nrows (pd_test_1_all.cpp:4658)
4648            std::cout << "========= DataFrame corr ========================";
4649
4650            std::map<std::string, std::vector<double>> data;
4651            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4652            data["B"] = {2.0, 4.0, 6.0, 8.0, 10.0};  // Perfect correlation
4653            pandas::DataFrame df(data);
4654
4655            auto corr_df = df.corr();
4656
4657            // Check dimensions
4658            bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659            if (!passed) {
4660                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662            }
4663
4664            // Diagonal should be 1.0
4665            std::string aa = corr_df["A"].get_value_str(0);
4666            passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667            if (!passed) {
4668                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
resolve_column_level (pd_test_3_all.cpp:28773)
28763    fail += spt_check(r.has_multiindex(), "result has multiindex");
28764    fail += spt_check(r.multiindex().nlevels() == 2, "multiindex nlevels==2 (row + stacked)");
28765    if (fail == 0) std::cout << "    OK" << std::endl;
28766    if (fail != 0) throw std::runtime_error("pd_test_stack_multiindex_result failed");
28767}
28768
28769void pd_test_resolve_column_level_int() {
28770    std::cout << "  -- pd_test_resolve_column_level_int --" << std::endl;
28771    int fail = 0;
28772    auto df = make_ml_df();
28773    fail += spt_check(df.resolve_column_level(0) == 0, "0 -> 0");
28774    fail += spt_check(df.resolve_column_level(1) == 1, "1 -> 1");
28775    fail += spt_check(df.resolve_column_level(-1) == 1, "-1 -> 1");
28776    fail += spt_check(df.resolve_column_level(-2) == 0, "-2 -> 0");
28777    fail += spt_check(df.resolve_column_level(5) == -1, "5 -> -1 (out of range)");
28778    if (fail == 0) std::cout << "    OK" << std::endl;
28779    if (fail != 0) throw std::runtime_error("pd_test_resolve_column_level_int failed");
28780}
28781
28782void pd_test_resolve_column_level_string() {
28783    std::cout << "  -- pd_test_resolve_column_level_string --" << std::endl;
resolve_column_level (pd_test_3_all.cpp:28773)
28763    fail += spt_check(r.has_multiindex(), "result has multiindex");
28764    fail += spt_check(r.multiindex().nlevels() == 2, "multiindex nlevels==2 (row + stacked)");
28765    if (fail == 0) std::cout << "    OK" << std::endl;
28766    if (fail != 0) throw std::runtime_error("pd_test_stack_multiindex_result failed");
28767}
28768
28769void pd_test_resolve_column_level_int() {
28770    std::cout << "  -- pd_test_resolve_column_level_int --" << std::endl;
28771    int fail = 0;
28772    auto df = make_ml_df();
28773    fail += spt_check(df.resolve_column_level(0) == 0, "0 -> 0");
28774    fail += spt_check(df.resolve_column_level(1) == 1, "1 -> 1");
28775    fail += spt_check(df.resolve_column_level(-1) == 1, "-1 -> 1");
28776    fail += spt_check(df.resolve_column_level(-2) == 0, "-2 -> 0");
28777    fail += spt_check(df.resolve_column_level(5) == -1, "5 -> -1 (out of range)");
28778    if (fail == 0) std::cout << "    OK" << std::endl;
28779    if (fail != 0) throw std::runtime_error("pd_test_resolve_column_level_int failed");
28780}
28781
28782void pd_test_resolve_column_level_string() {
28783    std::cout << "  -- pd_test_resolve_column_level_string --" << std::endl;
resolve_sort_columns_multiindex (pd_test_2_all.cpp:22232)
22222    }
22223    std::cout << "====================================== [OK] pd_test_series_dtype_inference test suite ========================== " << std::endl;
22224    return 0;
22225}
22226
22227} // namespace dataframe_tests
22228// ------------------- pd_test_series_dtype_inference.cpp (end) -----------------------------
22229
22230// ------------------- pd_test_sort_key.cpp (start) -----------------------------
22231// pd_test_sort_key.cpp - Tests for sort_values key function support
22232// Tests sort_values_by_transformed() and resolve_sort_columns_multiindex()
22233
22234#include <iostream>
22235#include <string>
22236#include <vector>
22237#include <cmath>
22238#include <numeric>
22239
22240#include "../pandas/pd_dataframe.h"
22241
22242namespace dataframe_tests {
round (pd_test_1_all.cpp:1688)
1678    void pd_test_floating_array_rounding() {
1679        std::cout << "========= FloatingArray: rounding ======================= ";
1680
1681        pandas::FloatingArray<double> arr({
1682            std::optional<double>(1.234),
1683            std::optional<double>(2.567),
1684            std::nullopt
1685        });
1686
1687        auto rounded = arr.round(2);
1688        if (std::abs(rounded[0].value() - 1.23) > 0.001 ||
1689            std::abs(rounded[1].value() - 2.57) > 0.001) {
1690            std::cout << "  [FAIL] : in pd_test_floating_array_rounding() : round(2)" << std::endl;
1691            throw std::runtime_error("pd_test_floating_array_rounding failed: round(2)");
1692        }
1693
1694        if (!rounded.is_na(2)) {
1695            std::cout << "  [FAIL] : in pd_test_floating_array_rounding() : round should preserve NA" << std::endl;
1696            throw std::runtime_error("pd_test_floating_array_rounding failed: NA preservation");
1697        }
row_values_f64 (pd_test_2_all.cpp:19385)
19375              << dataframe_tests_agg_dtype::g_fail << " failed)" << std::endl;
19376
19377    return dataframe_tests_agg_dtype::g_fail;
19378}
19379
19380} // namespace dataframe_tests
19381// ------------------- pd_test_agg_dtype.cpp (end) -----------------------------
19382
19383// ------------------- pd_test_apply_extract.cpp (start) -----------------------------
19384// pd_test_apply_extract.cpp - Tests for DataFrame column/row extraction helpers
19385// column_to_series_f64(), column_to_series_str(), row_values_f64()
19386
19387#include <iostream>
19388#include <string>
19389#include <vector>
19390#include <cmath>
19391
19392#include "../pandas/pd_dataframe.h"
19393#include "../pandas/pd_series.h"
19394#include "../pandas/pd_index.h"
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
select_dtypes (pd_test_1_all.cpp:7044)
7034            std::cout << "========= select_dtypes =============================";
7035
7036            // Create DataFrame with mixed types using add_column
7037            pandas::DataFrame df;
7038            df.add_column<numpy::int64>("int_col", {1, 2, 3});
7039            df.add_column<numpy::float64>("float_col", {1.1, 2.2, 3.3});
7040            df.add_column<std::string>("str_col", {"a", "b", "c"});
7041            df.add_column<numpy::bool_>("bool_col", {true, false, true});
7042
7043            // Test include int
7044            pandas::DataFrame int_result = df.select_dtypes({"int"});
7045            if (int_result.ncols() != 1 || !int_result.has_column("int_col")) {
7046                std::cout << "  [FAIL] : select_dtypes include int" << std::endl;
7047                throw std::runtime_error("pd_test_dataframe_select_dtypes failed: include int");
7048            }
7049
7050            // Test include number (numeric types)
7051            pandas::DataFrame num_result = df.select_dtypes({"number"});
7052            if (num_result.ncols() != 2 || !num_result.has_column("int_col") || !num_result.has_column("float_col")) {
7053                std::cout << "  [FAIL] : select_dtypes include number" << std::endl;
7054                throw std::runtime_error("pd_test_dataframe_select_dtypes failed: include number");
select_multiindex_columns (pd_test_3_all.cpp:29220)
29210    pandas::DataFrame df;
29211    df.add_column<numpy::float64>("c0", {1.0, 2.0});
29212    df.add_column<numpy::float64>("c1", {3.0, 4.0});
29213    df.add_column<numpy::float64>("c2", {5.0, 6.0});
29214    df.add_column<numpy::float64>("c3", {7.0, 8.0});
29215    std::vector<std::vector<std::string>> levels = {
29216        {"bar", "bar", "foo", "baz"},
29217        {"one", "two", "one", "two"}
29218    };
29219    df.set_columns_levels(levels, {"L0", "L1"});
29220    auto sub = df.select_multiindex_columns({"bar", "foo"});
29221    fail += sm_check(sub.ncols() == 3, "3 cols selected (bar,bar,foo)");
29222    fail += sm_check(sub.has_columns_levels(), "MI preserved");
29223    if (sub.has_columns_levels()) {
29224        const auto& lv = sub.columns_levels();
29225        fail += sm_check(lv.size() == 2, "2 levels preserved");
29226        fail += sm_check(lv[0].size() == 3 && lv[0][0] == "bar" && lv[0][2] == "foo", "level 0 values");
29227    }
29228    auto empty = df.select_multiindex_columns({"nonexistent"});
29229    fail += sm_check(empty.ncols() == 0, "no match -> empty");
29230    if (fail == 0) std::cout << "    OK" << std::endl;
set_column (pd_test_3_all.cpp:11118)
11108// ============================================================================
11109
11110void pd_test_3_all_set_column_aligned() {
11111    std::cout << "========= Fix 1: set_column aligned by index =============";
11112    pandas::DataFrame df;
11113    df.add_column<int64_t>("values", {1, 2, 3});
11114    df.set_index(std::make_unique<pandas::Index<std::string>>(
11115        std::vector<std::string>{"x", "y", "z"}));
11116
11117    pandas::Series<int> s1({10, 20, 30}, {"z", "x", "y"});
11118    df.set_column("aligned", s1);
11119
11120    // After alignment: x->20, y->30, z->10
11121    if (df.ncols() != 2) {
11122        std::cout << "  [FAIL] : in pd_test_3_all_set_column_aligned() : expected 2 cols" << std::endl;
11123        throw std::runtime_error("set_column aligned failed");
11124    }
11125    std::cout << " -> tests passed" << std::endl;
11126}
11127
11128void pd_test_3_all_set_column_partial() {
set_column_cat_categories (pd_test_5_all.cpp:29010)
29000}
29001
29002static const std::vector<std::string>& DAYS() {
29003    static const std::vector<std::string> d = {
29004        "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"};
29005    return d;
29006}
29007
29008static pandas::DataFrame make_579_fixture_cat(bool ordered = true) {
29009    pandas::DataFrame df = make_579_fixture();
29010    df.set_column_cat_categories("day", DAYS());
29011    df.set_column_cat_ordered("day", ordered);
29012    return df;
29013}
29014
29015static std::vector<std::string> result_index_strs(const pandas::DataFrame& r) {
29016    std::vector<std::string> out;
29017    const auto& idx = r.index();
29018    size_t n = idx.size();
29019    out.reserve(n);
29020    for (size_t i = 0; i < n; ++i) out.push_back(idx.get_value_str(i));
set_column_cat_ordered (pd_test_5_all.cpp:29011)
29001static const std::vector<std::string>& DAYS() {
29002    static const std::vector<std::string> d = {
29003        "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"};
29004    return d;
29005}
29006
29007static pandas::DataFrame make_579_fixture_cat(bool ordered = true) {
29008    pandas::DataFrame df = make_579_fixture();
29009    df.set_column_cat_categories("day", DAYS());
29010    df.set_column_cat_ordered("day", ordered);
29011    return df;
29012}
29013
29014static std::vector<std::string> result_index_strs(const pandas::DataFrame& r) {
29015    std::vector<std::string> out;
29016    const auto& idx = r.index();
29017    size_t n = idx.size();
29018    out.reserve(n);
29019    for (size_t i = 0; i < n; ++i) out.push_back(idx.get_value_str(i));
29020    return out;
set_column_datetime_ea (pd_test_extension_array.cpp:313)
303        numpy::datetime64(1609459200000000000LL, numpy::DateTimeUnit::Nanosecond),
304    };
305    std::map<std::string, std::vector<numpy::datetime64>> data;
306    data["d"] = dt_vec;
307    pandas::DataFrame df(data);
308    // Use the new C.1 helper to attach the EA.
309    std::vector<std::optional<pandas::Timestamp>> ts = {
310        pandas::Timestamp(1577836800000000000LL),
311        pandas::Timestamp(1609459200000000000LL),
312    };
313    df.set_column_datetime_ea("d", ts, /*uniform_tz=*/"");
314    auto& col = df["d"];
315    auto* col_dt = dynamic_cast<pandas::Series<numpy::datetime64>*>(&col);
316    check(col_dt != nullptr, "set_column_datetime_ea: col is Series<datetime64>");
317    if (col_dt) {
318        const auto& da_opt = col_dt->datetime_array();
319        check(da_opt.has_value() && *da_opt,
320              "set_column_datetime_ea: column EA populated");
321        if (da_opt.has_value() && *da_opt) {
322            check((*da_opt)->size() == 2,
323                  "set_column_datetime_ea: EA size matches");
set_column_dtype (pd_test_4_all.cpp:2203)
2193// ============================================================================
2194void na_iloc_slice_with_na() {
2195    pandas::DataFrame df;
2196
2197    constexpr double NaN = std::numeric_limits<double>::quiet_NaN();
2198
2199    // int_na : Int64 nullable — row 1 NA, row 2 = 3.
2200    //          Stored as double so NaN can act as the NA sentinel; the
2201    //          dtype override drives the "<NA>" / integer rendering.
2202    df.add_column<double>("int_na", {NaN, 3.0});
2203    df.set_column_dtype("int_na", "Int64");
2204
2205    // float_na : plain float64 with NaN — no override needed.
2206    df.add_column<double>("float_na", {NaN, 3.0});
2207
2208    // bool_na : nullable boolean — row 1 NA, row 2 = False.
2209    //           Stored as double with NaN sentinel; dtype override drives
2210    //           the "<NA>" / "True" / "False" rendering.
2211    df.add_column<double>("bool_na", {NaN, 0.0});
2212    df.set_column_dtype("bool_na", "boolean");
set_column_dtype (pd_test_4_all.cpp:2203)
2193// ============================================================================
2194void na_iloc_slice_with_na() {
2195    pandas::DataFrame df;
2196
2197    constexpr double NaN = std::numeric_limits<double>::quiet_NaN();
2198
2199    // int_na : Int64 nullable — row 1 NA, row 2 = 3.
2200    //          Stored as double so NaN can act as the NA sentinel; the
2201    //          dtype override drives the "<NA>" / integer rendering.
2202    df.add_column<double>("int_na", {NaN, 3.0});
2203    df.set_column_dtype("int_na", "Int64");
2204
2205    // float_na : plain float64 with NaN — no override needed.
2206    df.add_column<double>("float_na", {NaN, 3.0});
2207
2208    // bool_na : nullable boolean — row 1 NA, row 2 = False.
2209    //           Stored as double with NaN sentinel; dtype override drives
2210    //           the "<NA>" / "True" / "False" rendering.
2211    df.add_column<double>("bool_na", {NaN, 0.0});
2212    df.set_column_dtype("bool_na", "boolean");
set_column_names (pd_test_3_all.cpp:1708)
1698    pandas::DataFrame f_df;
1699    f_df.add_column<std::int64_t>("a", {1, 2});
1700    f_df.add_column<std::int64_t>("b", {3, 4});
1701    std::map<std::string, std::string> f_mapper{{"a", "A"}, {"b", "B"}};
1702    auto f = f_df.rename_columns(f_mapper);
1703    if (f.columns().get_value_str(0) != "A") {
1704        std::cout << "  [FAIL] : in pd_test_3_all_chainable_mutators() : Case F col[0]" << std::endl;
1705        throw std::runtime_error("pd_test_3_all_chainable_mutators failed: Case F col[0]");
1706    }
1707
1708    // --- Case G: set_column_names(vector) returns reference (chainable) ---
1709    pandas::DataFrame g_df;
1710    g_df.add_column<std::int64_t>("c1", {1});
1711    g_df.add_column<std::int64_t>("c2", {2});
1712    auto& g_ref = g_df.set_column_names(std::vector<std::string>{"x", "y"});
1713    if (&g_ref != &g_df) {
1714        std::cout << "  [FAIL] : in pd_test_3_all_chainable_mutators() : Case G not self-ref" << std::endl;
1715        throw std::runtime_error("pd_test_3_all_chainable_mutators failed: Case G not self-ref");
1716    }
1717    if (g_df.columns().get_value_str(0) != "x") {
1718        std::cout << "  [FAIL] : in pd_test_3_all_chainable_mutators() : Case G col[0]" << std::endl;
set_column_resolved (pd_test_5_all.cpp:94241)
94231    pandas::DtypeOverride ov = pandas::DtypeOverride::parse("Float64");
94232    auto df = pandas::from_records_resolved<pandas::DataFrame>(rows, cols, ov);
94233    pandas_tests::check(df.column_dtype_override("n") == "Float64",
94234        "C_26h_case_144_records_override_applies()_dtype", local_fail);
94235}
94236
94237void case_150_create_new_column(int& local_fail) {
94238    std::cout << "-- case_150_create_new_column\n";
94239    pandas::DataFrame df;
94240    KVec col = { kv_int(1), kv_int(2) };
94241    df.set_column_resolved("n", col);
94242    pandas_tests::check(df.has_column("n"),
94243        "C_26h_case_150_create_new_column()_has", local_fail);
94244    pandas_tests::check(df.column_dtype_override("n") == "int64",
94245        "C_26h_case_150_create_new_column()_dtype", local_fail);
94246}
94247
94248void case_151_replace_existing_int_to_float(int& local_fail) {
94249    std::cout << "-- case_151_replace_existing_int_to_float\n";
94250    pandas::DataFrame df;
94251    df.add_column<std::int64_t>("v", { 1, 2, 3 });
set_column_sparse_fill_value (pd_test_5_all.cpp:23673)
23663              df.to_string(), local_fail);
23664}
23665
23666void f_set_index_col_multiindex_display_6_843271_case_12_site1_sparse_all_nan_numeric(int& local_fail) {
23667    std::cout << "----- case_12_site1_sparse_all_nan_numeric -----\n";
23668    const double NaN = std::numeric_limits<double>::quiet_NaN();
23669    pandas::DataFrame df;
23670    df.add_column<double>("SNaN", {NaN, NaN, NaN});
23671    // Mark the column as sparse with NaN fill value so is_sparse=true and
23672    // the sparse-all-NaN bump path fires.
23673    df.set_column_sparse_fill_value("SNaN", NaN, "float64");
23674    apply_default_display(df);
23675    check_str("case_12.site1_sparse_all_nan_numeric",
23676              EXPECTED_case_12_site1_sparse_all_nan_numeric,
23677              df.to_string(), local_fail);
23678}
23679
23680} // namespace f_test_set_index_col_multiindex_display_6_ns
23681
23682void f_test_set_index_col_multiindex_display_6() {
23683    std::cout << "========= f_test_set_index_col_multiindex_display_6 =======";
set_columns_index_dtype (pd_test_4_all.cpp:4985)
4975//   df.rename(columns={0: 10, 1: 20})
4976// Expected pd= block: temp/PandasPython.res:15487
4977// ============================================================================
4978void rename_int_cols() {
4979    std::map<std::string, std::vector<int64_t>> data = {
4980        {"0", {1, 2}},
4981        {"1", {3, 4}},
4982        {"2", {5, 6}},
4983    };
4984    pandas::DataFrame df(data);
4985    df.set_columns_index_dtype("int64");
4986
4987    pandas::DataFrame renamed = df.rename_columns(
4988        std::map<std::string, std::string>{{"0", "10"}, {"1", "20"}});
4989    renamed.set_columns_index_dtype("int64");
4990    apply_default_display(renamed);
4991
4992    // Expected verbatim from temp/PandasPython.res:15488-15490
4993    const std::string expected =
4994        "   10  20  2 \n"
4995        "0   1   3   5\n"
set_columns_levels (pd_test_2_all.cpp:20310)
20300    check(t == pandas::DataFrame::ColumnAccessType::DuplicateColumns, "duplicate -> DuplicateColumns");
20301}
20302
20303void pd_test_getitem_dispatch_classify_multiindex() {
20304    std::cout << "pd_test_getitem_dispatch_classify_multiindex" << std::endl;
20305    pandas::DataFrame df;
20306    std::vector<numpy::float64> v1 = {1.0, 2.0};
20307    std::vector<numpy::float64> v2 = {3.0, 4.0};
20308    df.insert(0, "A_x", std::make_unique<pandas::Series<numpy::float64>>(v1, "A_x"), true);
20309    df.insert(1, "A_y", std::make_unique<pandas::Series<numpy::float64>>(v2, "A_y"), true);
20310    df.set_columns_levels({{"A", "A"}, {"x", "y"}}, {"first", "second"});
20311
20312    auto t = df.classify_column_access("A");
20313    check(t == pandas::DataFrame::ColumnAccessType::MultiIndexGroup, "multiindex top -> MultiIndexGroup");
20314}
20315
20316// =========================================================================
20317// Extraction tests
20318// =========================================================================
20319
20320void pd_test_getitem_dispatch_extract_numeric() {
set_columns_name (pd_test_3_all.cpp:10815)
10805    std::cout << " -> tests passed" << std::endl;
10806}
10807
10808void pd_test_3_all_columns_name_setter() {
10809    std::cout << "========= DataFrame.columns_name(string) setter ========";
10810    pandas::DataFrame df;
10811    df.add_column<int64_t>("A", {1, 2, 3});
10812    df.columns_name("MyColumns");
10813    pandas::DataFrame df2;
10814    df2.add_column<int64_t>("A", {1, 2, 3});
10815    df2.set_columns_name("MyColumns");
10816    if (df.to_string() != df2.to_string()) {
10817        std::cout << "  [FAIL] : in pd_test_3_all_columns_name_setter() : mismatch" << std::endl;
10818        throw std::runtime_error("pd_test_3_all_columns_name_setter failed");
10819    }
10820    std::cout << " -> tests passed" << std::endl;
10821}
10822
10823void pd_test_3_all_column_alias() {
10824    std::cout << "========= DataFrame.column<T>() alias ==================";
10825    pandas::DataFrame df;
set_datetime_index (pd_test_3_all.cpp:1322)
1312        if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
1313            throw std::runtime_error("set_multi_index: shape wrong");
1314        }
1315    }
1316
1317    // G. set_datetime_index
1318    {
1319        pandas::DataFrame df;
1320        df.add_column<double>("price",
1321            {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0});
1322        df.set_datetime_index("2020-01-01", 10, "D");
1323        if (df.index().size() != 10) {
1324            throw std::runtime_error("set_datetime_index: size wrong");
1325        }
1326    }
1327
1328    // H. set_nan_marker
1329    {
1330        pandas::DataFrame df;
1331        std::vector<std::string> v_str = {"", "y", "z", ""};
1332        df.add_column<std::string>("v_str", v_str);
set_display_options (pd_test_4_all.cpp:118)
108}
109static std::string load_expected(const std::string& relpath) {
110    std::ifstream f(relpath, std::ios::binary);
111    if (!f) throw std::runtime_error("cannot open expected fixture: " + relpath);
112    std::string s((std::istreambuf_iterator<char>(f)), std::istreambuf_iterator<char>());
113    std::string out; out.reserve(s.size());
114    for (char ch : s) if (ch != '\r') out.push_back(ch);
115    return out;
116}
117static void apply_default_display(pandas::DataFrame& df) {
118    df.set_display_options(80, 50, 60, true);
119}
120
121void unstack_multiindex_compare_all() {
122    pandas::DataFrame df;
123    df.add_column<int64_t>("A_one", { 0, 2, 4});
124    df.add_column<int64_t>("A_two", { 1, 3, 5});
125    df.add_column<int64_t>("B_one", { 6, 8, 10});
126    df.add_column<int64_t>("B_two", { 7, 9, 11});
127
128    auto row_idx = std::make_unique<pandas::Index<std::string>>(
set_flags (pd_test_1_all.cpp:16410)
16400                throw std::runtime_error("pd_test_ndframe_series_flags failed: default allows_duplicate_labels");
16401            }
16402
16403            passed = s.flags().copy_on_write == false;
16404            if (!passed) {
16405                std::cout << "  [FAIL] : in pd_test_ndframe_series_flags() : default copy_on_write" << std::endl;
16406                throw std::runtime_error("pd_test_ndframe_series_flags failed: default copy_on_write");
16407            }
16408
16409            // Test set_flags
16410            s.set_flags(pandas::Flags(false, true));
16411            passed = s.flags().allows_duplicate_labels == false;
16412            if (!passed) {
16413                std::cout << "  [FAIL] : in pd_test_ndframe_series_flags() : set allows_duplicate_labels" << std::endl;
16414                throw std::runtime_error("pd_test_ndframe_series_flags failed: set allows_duplicate_labels");
16415            }
16416
16417            passed = s.flags().copy_on_write == true;
16418            if (!passed) {
16419                std::cout << "  [FAIL] : in pd_test_ndframe_series_flags() : set copy_on_write" << std::endl;
16420                throw std::runtime_error("pd_test_ndframe_series_flags failed: set copy_on_write");
set_integer_index (pd_test_3_all.cpp:1277)
1267    {
1268        pandas::DataFrame df1;
1269        df1.add_column<double>("temp", {22.1, 23.5, 19.8, 25.0});
1270        df1.set_string_index({"Mon", "Tue", "Wed", "Thu"});
1271        if (df1.index().get_value_str(2) != "Wed") {
1272            throw std::runtime_error("set_string_index: values wrong");
1273        }
1274
1275        pandas::DataFrame df2;
1276        df2.add_column<int64_t>("val", {3, 4});
1277        df2.set_integer_index({2, 3});
1278        if (df2.index().size() != 2) {
1279            throw std::runtime_error("set_integer_index: size wrong");
1280        }
1281
1282        pandas::DataFrame df3;
1283        df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1284        df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1285        if (df3.index().get_value_str(4) != "b") {
1286            throw std::runtime_error("set_index_from_strings: values wrong");
1287        }
set_integer_index (pd_test_3_all.cpp:1277)
1267    {
1268        pandas::DataFrame df1;
1269        df1.add_column<double>("temp", {22.1, 23.5, 19.8, 25.0});
1270        df1.set_string_index({"Mon", "Tue", "Wed", "Thu"});
1271        if (df1.index().get_value_str(2) != "Wed") {
1272            throw std::runtime_error("set_string_index: values wrong");
1273        }
1274
1275        pandas::DataFrame df2;
1276        df2.add_column<int64_t>("val", {3, 4});
1277        df2.set_integer_index({2, 3});
1278        if (df2.index().size() != 2) {
1279            throw std::runtime_error("set_integer_index: size wrong");
1280        }
1281
1282        pandas::DataFrame df3;
1283        df3.add_column<int64_t>("val", {1, 2, 3, 4, 5});
1284        df3.set_index_from_strings({"a", "b", "c", "a", "b"});
1285        if (df3.index().get_value_str(4) != "b") {
1286            throw std::runtime_error("set_index_from_strings: values wrong");
1287        }
set_multi_index (pd_test_3_all.cpp:1311)
1301        df.index_name("year_end");  // setter form, used by 6 failing tests
1302        if (!df.index_name().has_value() || df.index_name().value() != "year_end") {
1303            throw std::runtime_error("index_name(setter): round-trip failed");
1304        }
1305    }
1306
1307    // F. set_multi_index -- two-level brace-init
1308    {
1309        pandas::DataFrame df;
1310        df.add_column<int64_t>("val", {10, 20, 30, 40});
1311        df.set_multi_index({{"A", "A", "B", "B"}, {"1", "2", "1", "2"}});
1312        if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
1313            throw std::runtime_error("set_multi_index: shape wrong");
1314        }
1315    }
1316
1317    // G. set_datetime_index
1318    {
1319        pandas::DataFrame df;
1320        df.add_column<double>("price",
1321            {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0});
set_multi_index (pd_test_3_all.cpp:1311)
1301        df.index_name("year_end");  // setter form, used by 6 failing tests
1302        if (!df.index_name().has_value() || df.index_name().value() != "year_end") {
1303            throw std::runtime_error("index_name(setter): round-trip failed");
1304        }
1305    }
1306
1307    // F. set_multi_index -- two-level brace-init
1308    {
1309        pandas::DataFrame df;
1310        df.add_column<int64_t>("val", {10, 20, 30, 40});
1311        df.set_multi_index({{"A", "A", "B", "B"}, {"1", "2", "1", "2"}});
1312        if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
1313            throw std::runtime_error("set_multi_index: shape wrong");
1314        }
1315    }
1316
1317    // G. set_datetime_index
1318    {
1319        pandas::DataFrame df;
1320        df.add_column<double>("price",
1321            {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0});
set_multiindex (pd_test_2_all.cpp:20409)
20399        check(s.get_freq().value() == "D", "freq value D");
20400    }
20401
20402    // Test MultiIndex propagation
20403    pandas::DataFrame df2;
20404    std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405    df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406    std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407    std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408    auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409    df2.set_multiindex(mi);
20410
20411    auto s2 = df2.extract_column_as_numeric_series("A");
20412    check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418    std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419    dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399        check(s.get_freq().value() == "D", "freq value D");
20400    }
20401
20402    // Test MultiIndex propagation
20403    pandas::DataFrame df2;
20404    std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405    df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406    std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407    std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408    auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409    df2.set_multiindex(mi);
20410
20411    auto s2 = df2.extract_column_as_numeric_series("A");
20412    check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418    std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419    dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399        check(s.get_freq().value() == "D", "freq value D");
20400    }
20401
20402    // Test MultiIndex propagation
20403    pandas::DataFrame df2;
20404    std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405    df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406    std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407    std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408    auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409    df2.set_multiindex(mi);
20410
20411    auto s2 = df2.extract_column_as_numeric_series("A");
20412    check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418    std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419    dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399        check(s.get_freq().value() == "D", "freq value D");
20400    }
20401
20402    // Test MultiIndex propagation
20403    pandas::DataFrame df2;
20404    std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405    df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406    std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407    std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408    auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409    df2.set_multiindex(mi);
20410
20411    auto s2 = df2.extract_column_as_numeric_series("A");
20412    check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418    std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419    dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399        check(s.get_freq().value() == "D", "freq value D");
20400    }
20401
20402    // Test MultiIndex propagation
20403    pandas::DataFrame df2;
20404    std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405    df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406    std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407    std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408    auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409    df2.set_multiindex(mi);
20410
20411    auto s2 = df2.extract_column_as_numeric_series("A");
20412    check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418    std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419    dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399        check(s.get_freq().value() == "D", "freq value D");
20400    }
20401
20402    // Test MultiIndex propagation
20403    pandas::DataFrame df2;
20404    std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405    df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406    std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407    std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408    auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409    df2.set_multiindex(mi);
20410
20411    auto s2 = df2.extract_column_as_numeric_series("A");
20412    check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418    std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419    dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399        check(s.get_freq().value() == "D", "freq value D");
20400    }
20401
20402    // Test MultiIndex propagation
20403    pandas::DataFrame df2;
20404    std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405    df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406    std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407    std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408    auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409    df2.set_multiindex(mi);
20410
20411    auto s2 = df2.extract_column_as_numeric_series("A");
20412    check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418    std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419    dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex (pd_test_2_all.cpp:20409)
20399        check(s.get_freq().value() == "D", "freq value D");
20400    }
20401
20402    // Test MultiIndex propagation
20403    pandas::DataFrame df2;
20404    std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405    df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406    std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407    std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408    auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409    df2.set_multiindex(mi);
20410
20411    auto s2 = df2.extract_column_as_numeric_series("A");
20412    check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418    std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419    dataframe_tests_getitem_dispatch::g_pass = 0;
set_multiindex_names (pd_test_2_all.cpp:20775)
20765    // Simulate a 2-level groupby result with composite \x1f keys
20766    using std::string;
20767    string sep(1, '\x1f');
20768    std::vector<string> keys = {"A" + sep + "X", "A" + sep + "Y", "B" + sep + "X", "B" + sep + "Y"};
20769
20770    std::vector<numpy::float64> values = {1.0, 2.0, 3.0, 4.0};
20771    pandas::Series<std::string> by(keys);
20772    pandas::Series<numpy::float64> data(values);
20773
20774    auto sgb = data.groupby(by);
20775    sgb.set_multiindex_names({"level0", "level1"});
20776
20777    // Create a "result" series with composite index
20778    pandas::Series<numpy::float64> result(values);
20779    result.set_index(std::make_unique<pandas::Index<std::string>>(keys));
20780
20781    sgb.apply_result_index(result);
20782
20783    // Should now have a MultiIndex
20784    check(result.has_multiindex(), "has_multiindex");
20785    check(result.multiindex().nlevels() == 2, "nlevels_2");
set_nan_marker (pd_test_3_all.cpp:1334)
1324            throw std::runtime_error("set_datetime_index: size wrong");
1325        }
1326    }
1327
1328    // H. set_nan_marker
1329    {
1330        pandas::DataFrame df;
1331        std::vector<std::string> v_str = {"", "y", "z", ""};
1332        df.add_column<std::string>("v_str", v_str);
1333        df.add_column<int64_t>("v_num", {1, 2, 3, 4});
1334        df.set_nan_marker("v_str", {true, false, false, true});
1335        // exact mask-bit assertions depend on Series<string> mask API
1336    }
1337
1338    // I. set_index_from_column<T>
1339    {
1340        pandas::DataFrame df;
1341        df.add_column<int64_t>("val", {1, 3});
1342        df.add_column<int64_t>("count", {30, 70});
1343        df.set_index_from_column<std::string>("group", {"A", "B"});
1344        if (!df.index_name().has_value() || df.index_name().value() != "group") {
set_option (pd_test_3_all.cpp:10876)
10866    df.set_multiindex(mi);
10867    auto result = df.reorder_levels({"second", "first"});
10868    if (result.nrows() != 4) {
10869        std::cout << "  [FAIL] : in pd_test_3_all_reorder_levels_initlist() : wrong nrows" << std::endl;
10870        throw std::runtime_error("pd_test_3_all_reorder_levels_initlist failed");
10871    }
10872    std::cout << " -> tests passed" << std::endl;
10873}
10874
10875void pd_test_3_all_set_option_instance() {
10876    std::cout << "========= DataFrame.set_option() instance ==============";
10877    pandas::DataFrame df;
10878    df.add_column<int64_t>("A", {1, 2, 3});
10879    df.set_option("display.multi_sparse", false);
10880    df.set_option("display.unknown_key", true);  // should not throw
10881    std::cout << " -> tests passed" << std::endl;
10882}
10883
10884// ============================================================================
10885// Category 48: Indexing Fixes (Plan: plan_indexing_fixes)
10886// ============================================================================
set_string_index (pd_test_3_all.cpp:1270)
1260        df.set_index_from_list(labels);
1261        if (df.index().get_value_str(1) != "q") {
1262            throw std::runtime_error("set_index_from_list(vector): values wrong");
1263        }
1264    }
1265
1266    // D. Aliases: set_string_index / set_integer_index / set_index_from_strings
1267    {
1268        pandas::DataFrame df1;
1269        df1.add_column<double>("temp", {22.1, 23.5, 19.8, 25.0});
1270        df1.set_string_index({"Mon", "Tue", "Wed", "Thu"});
1271        if (df1.index().get_value_str(2) != "Wed") {
1272            throw std::runtime_error("set_string_index: values wrong");
1273        }
1274
1275        pandas::DataFrame df2;
1276        df2.add_column<int64_t>("val", {3, 4});
1277        df2.set_integer_index({2, 3});
1278        if (df2.index().size() != 2) {
1279            throw std::runtime_error("set_integer_index: size wrong");
1280        }
shape (pd_test_1_all.cpp:6188)
6178            std::cout << "========= properties =======================";
6179
6180            std::map<std::string, std::vector<numpy::float64>> data;
6181            data["A"] = {1.0, 2.0, 3.0, 4.0};
6182            data["B"] = {5.0, 6.0, 7.0, 8.0};
6183            data["C"] = {9.0, 10.0, 11.0, 12.0};
6184
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
shape_2d (pd_test_3_all.cpp:10796)
10786    }
10787
10788    std::cout << " -> tests passed" << std::endl;
10789}
10790
10791// ============================================================================
10792// Category 47: Quick Fixes & Aliases Tests
10793// ============================================================================
10794
10795void pd_test_3_all_shape_2d() {
10796    std::cout << "========= DataFrame.shape_2d() =========================";
10797    pandas::DataFrame df;
10798    df.add_column<int64_t>("A", {1, 2, 3});
10799    df.add_column<int64_t>("B", {4, 5, 6});
10800    auto s = df.shape_2d();
10801    if (s.first != 3 || s.second != 2) {
10802        std::cout << "  [FAIL] : in pd_test_3_all_shape_2d() : wrong dimensions" << std::endl;
10803        throw std::runtime_error("pd_test_3_all_shape_2d failed");
10804    }
10805    std::cout << " -> tests passed" << std::endl;
10806}
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
sparse (pd_test_3_all.cpp:20627)
20617#include <cmath>
20618
20619#include "../pandas/pd_series.h"
20620
20621// CRITICAL: No using namespace directives
20622
20623namespace dataframe_tests {
20624namespace dataframe_tests_sparse_accessor {
20625
20626// ============================================================================
20627// Test sparse().density() and sparse().npoints()
20628// ============================================================================
20629
20630void pd_test_sparse_density_npoints() {
20631    std::cout << "========= Series.sparse().density/npoints() =============";
20632
20633    // Create a series with some zeros (sparse values)
20634    pandas::Series<numpy::float64> s({0.0, 1.0, 0.0, 2.0, 0.0, 3.0});
20635
20636    auto sparse = s.sparse(0.0);  // 0.0 is the fill value
ss (pd_test_3_all.cpp:27670)
27660            fail++;
27661        } else {
27662            auto cats = str_s->get_cat_categories();
27663            if (cats.size() != 3) {
27664                std::cout << "    FAIL: expected 3 categories, got " << cats.size() << std::endl;
27665                fail++;
27666            }
27667        }
27668    }
27669
27670    pandas::Series<std::string> ss({"a", "b", "a", "c"}, "strs");
27671    auto result2 = ss.astype("category");
27672    auto* str_s2 = dynamic_cast<pandas::Series<std::string>*>(result2.get());
27673    if (!str_s2) {
27674        std::cout << "    FAIL: expected Series<string> for string->category" << std::endl;
27675        fail++;
27676    } else {
27677        if (str_s2->dtype_name() != "category") {
27678            std::cout << "    FAIL: dtype should be category" << std::endl;
27679            fail++;
27680        }
trim (pd_test_5_all.cpp:58873)
58863    auto trim = [](const std::string& s) {
58864        size_t a = s.find_first_not_of(" \t");
58865        size_t b = s.find_last_not_of(" \t\r");
58866        if (a == std::string::npos) return std::string();
58867        return s.substr(a, b - a + 1);
58868    };
58869    size_t pos = 0;
58870    while (pos < body.size()) {
58871        size_t nl = body.find('\n', pos);
58872        std::string raw = body.substr(pos, nl == std::string::npos ? std::string::npos : nl - pos);
58873        std::string t = trim(raw);
58874        if (!t.empty()) {
58875            // tokenise by whitespace
58876            std::vector<std::string> toks;
58877            std::string cur;
58878            for (char c : t) {
58879                if (c == ' ' || c == '\t') {
58880                    if (!cur.empty()) { toks.push_back(cur); cur.clear(); }
58881                } else {
58882                    cur.push_back(c);
58883                }
trim (pd_test_5_all.cpp:58873)
58863    auto trim = [](const std::string& s) {
58864        size_t a = s.find_first_not_of(" \t");
58865        size_t b = s.find_last_not_of(" \t\r");
58866        if (a == std::string::npos) return std::string();
58867        return s.substr(a, b - a + 1);
58868    };
58869    size_t pos = 0;
58870    while (pos < body.size()) {
58871        size_t nl = body.find('\n', pos);
58872        std::string raw = body.substr(pos, nl == std::string::npos ? std::string::npos : nl - pos);
58873        std::string t = trim(raw);
58874        if (!t.empty()) {
58875            // tokenise by whitespace
58876            std::vector<std::string> toks;
58877            std::string cur;
58878            for (char c : t) {
58879                if (c == ' ' || c == '\t') {
58880                    if (!cur.empty()) { toks.push_back(cur); cur.clear(); }
58881                } else {
58882                    cur.push_back(c);
58883                }
truncate (pd_test_1_all.cpp:20467)
20457            std::vector<std::string> dates = {
20458                "2020-01-01",
20459                "2020-01-02",
20460                "2020-01-03",
20461                "2020-01-04",
20462                "2020-01-05"
20463            };
20464            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20465
20466            // Truncate to keep only dates from 2020-01-02 to 2020-01-04
20467            pandas::DataFrame result = df.truncate("2020-01-02", "2020-01-04");
20468
20469            bool passed = (result.nrows() == 3);
20470
20471            if (!passed) {
20472                std::cout << "  [FAIL] : in pd_test_timeseries_truncate() : expected 3 rows, got "
20473                          << result.nrows() << std::endl;
20474                throw std::runtime_error("pd_test_timeseries_truncate failed");
20475            }
20476
20477            std::cout << " -> tests passed" << std::endl;
ts (pd_test_2_all.cpp:22590)
22580void test_to_datetime_numeric_seconds() {
22581    std::cout << "  -- test_to_datetime_numeric_seconds --" << std::endl;
22582    // 1490195805 seconds = 2017-03-22 15:16:45 UTC
22583    std::vector<double> vals = {1490195805.0};
22584    auto arr = pandas::to_datetime_numeric(vals, "s");
22585    check(arr.size() == 1, "size==1");
22586    auto v = arr[0];
22587    check(v.has_value(), "has_value");
22588    if (v.has_value()) {
22589        pandas::Timestamp ts(v->getValue());
22590        check(ts.year() == 2017, "year==2017");
22591        check(ts.month() == 3, "month==3");
22592        check(ts.day() == 22, "day==22");
22593        check(ts.hour() == 15, "hour==15");
22594        check(ts.minute() == 16, "min==16");
22595        check(ts.second() == 45, "sec==45");
22596    }
22597}
22598
22599void test_to_datetime_numeric_millis() {
values (pd_test_1_all.cpp:364)
354        pandas::CategoricalArray arr1;
355        if (arr1.size() != 0) {
356            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
357            throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
358        }
359        if (arr1.ordered()) {
360            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
361            throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
362        }
363
364        // Constructor from values (infer categories)
365        std::vector<std::optional<std::string>> values = {
366            std::optional<std::string>("a"),
367            std::optional<std::string>("b"),
368            std::optional<std::string>("a"),
369            std::optional<std::string>("c")
370        };
371        pandas::CategoricalArray arr2(values);
372        if (arr2.size() != 4) {
373            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : values constructor size != 4" << std::endl;
374            throw std::runtime_error("pd_test_categorical_array_constructors failed: values constructor size != 4");