Series#

class pandas::Series#

Core data container class in the pandas namespace.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Create Series
Series<double> s({1.0, 2.0, 3.0}, "values");

// Statistics
double mean_val = s.mean();
double std_val = s.std();

Constructors#

Signature

Location

Example

explicit Series(const numpy::NDArray<T>& data, const std::optional<std::string>& name = std::nullopt, const std::string& dtype = "", bool copy = false)

pd_series.h:802

View

Series(const std::vector<T>& data, const std::optional<std::string>& name = std::nullopt, const std::string& dtype = "", bool copy = false)

pd_series.h:828

View

Series(const numpy::NDArray<T>& data, const Index<IndexT>& idx, const std::optional<std::string>& name = std::nullopt)

pd_series.h:883

View

Series(const numpy::NDArray<T>& data, const RangeIndex& idx, const std::optional<std::string>& name = std::nullopt)

pd_series.h:902

View

Series(const std::vector<T>& data, const Index<IndexT>& idx, const std::optional<std::string>& name = std::nullopt)

pd_series.h:919

View

Series(const Series& other)

pd_series.h:938

View

Series(Series&& other) noexcept

pd_series.h:970

View

Construction#

Signature

Return Type

Location

Example

std::unique_ptr<NDFrameBase> create_nan_filled(size_t n) const override

std::unique_ptr<NDFrameBase>

pd_series.h:11189

Indexing / Selection#

Signature

Return Type

Location

Example

T at(size_t idx) const

T

pd_series.h:1550

View

Series<T> at_time( const std::string& time, bool asof = false, int axis = 0 ) const

Series<T>

pd_series.h:15902

View

Series<T> first(const std::string& offset) const

Series<T>

pd_series.h:12867

View

std::optional<size_t> first_valid_index() const

std::optional<size_t>

pd_series.h:6786

View

T get(const std::string& key, const std::optional<T>& default_value = std::nullopt) const

T

pd_series.h:12896

View

LabelLookup get_by_label_duplicates(const std::string& key) const

LabelLookup

pd_series.h:2198

View

const std::vector<std::string>& get_cat_categories() const override

const std::vector<std::string>&

pd_series.h:1248

View

std::string get_cat_categories_dtype() const override

std::string

pd_series.h:1269

numpy::datetime64 get_datetime(size_t i) const

numpy::datetime64

pd_series.h:10454

std::optional<std::string> get_freq() const

std::optional<std::string>

pd_series.h:10544

View

std::optional<T> get_optional(size_t i) const

std::optional<T>

pd_series.h:1300

View

Series<T> get_slice_as_series(size_t start, size_t stop) const

Series<T>

pd_series.h:2238

View

std::optional<double> get_sparse_fill_value() const

std::optional<double>

pd_series.h:10535

std::string get_string(size_t i) const

std::string

pd_series.h:10420

View

bool get_value_bool(size_t idx) const override

bool

pd_series.h:2947

View

double get_value_double(size_t idx) const override

double

pd_series.h:10290

View

std::string get_value_str(size_t idx) const override

std::string

pd_series.h:9264

View

<< get_value_str(i)

<<

pd_series.h:10946

View

Series<T> head(size_t n = 5) const

Series<T>

pd_series.h:2494

View

T iat(size_t idx) const

T

pd_series.h:1562

View

T iat_resolved(std::int64_t idx) const

T

pd_series.h:2035

View

std::string idxmax() const

std::string

pd_series.h:3913

View

std::pair<bool, std::pair<int64_t, std::string>> idxmax_typed() const

std::pair<bool, std::pair<int64_t, std::string>>

pd_series.h:3921

View

std::string idxmin() const

std::string

pd_series.h:3906

View

std::pair<bool, std::pair<int64_t, std::string>> idxmin_typed() const

std::pair<bool, std::pair<int64_t, std::string>>

pd_series.h:3934

View

Series<T> iloc(size_t start, size_t stop, size_t step = 1) const

Series<T>

pd_series.h:2052

View

Series<T> iloc(const std::vector<size_t>& indices) const

Series<T>

pd_series.h:2115

View

Series<T> last(const std::string& offset) const

Series<T>

pd_series.h:12882

View

std::optional<size_t> last_valid_index() const

std::optional<size_t>

pd_series.h:6806

View

T loc(const std::string& label) const

T

pd_series.h:2269

View

Series<T> loc(const std::vector<std::string>& labels) const

Series<T>

pd_series.h:2282

View

Series<T> loc_slice(const pandas::SliceSpec& spec) const

Series<T>

pd_series.h:1908

View

pandas::StringLookupResult loc_string(const std::string& key) const

pandas::StringLookupResult

pd_series.h:1602

View

pandas::LookupResult<T> loc_timedelta(const pandas::Timedelta& key) const

pandas::LookupResult<T>

pd_series.h:1786

View

pandas::LookupResult<T> loc_timestamp(const pandas::Timestamp& key) const

pandas::LookupResult<T>

pd_series.h:1749

View

const numpy::NDArray<numpy::bool_>& mask() const

const numpy::NDArray<numpy::bool_>&

pd_series.h:1293

View

Series<T> mask(const numpy::NDArray<numpy::bool_>& cond, const T& other, int axis = 0, bool inplace = false, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:2458

View

bool mask_at(size_t i) const override

bool

pd_series.h:1295

View

Series<T> nlargest(size_t n = 5, const std::string& keep = "first") const

Series<T>

pd_series.h:13644

View

Series<T> nsmallest(size_t n = 5, const std::string& keep = "first") const

Series<T>

pd_series.h:13700

View

Series<T> sample( std::optional<size_t> n = std::nullopt, std::optional<double> frac = std::nullopt, bool replace = false, const std::vector<double>& weights = {}, std::optional<unsigned int> random_state = std::nullopt, int axis = 0, bool ignore_index = false) const

Series<T>

pd_series.h:15536

View

void set_value_double(size_t idx, double value) override

void

pd_series.h:2997

void set_value_nan(size_t idx) override

void

pd_series.h:2971

View

void set_value_str(size_t idx, const std::string& value) override

void

pd_series.h:3011

bool set_value_with_enlarge(const std::string& label, T value)

bool

pd_series.h:3041

View

Series<T> tail(size_t n = 5) const

Series<T>

pd_series.h:2502

View

Series<T> take(const std::vector<size_t>& indices, int axis = 0) const

Series<T>

pd_series.h:2512

View

std::unique_ptr<NDFrameBase> take_indices(const std::vector<size_t>& indices) const override

std::unique_ptr<NDFrameBase>

pd_series.h:11546

Series<T> where(const numpy::NDArray<numpy::bool_>& cond, const T& other, int axis = 0, bool inplace = false, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:2375

View

Series<T> where(const numpy::NDArray<numpy::bool_>& cond, const Series<T>& other) const

Series<T>

pd_series.h:2409

View

Result where_resolved_typed( const pandas::Series<numpy::bool_>& cond, const pandas::FillValue& other) const

Result

pd_series.h:16301

View

Series<T> where_t_or_default(const numpy::NDArray<numpy::bool_>& cond, const pandas::FillValue& fv) const

Series<T>

pd_series.h:16340

Series<T> xs(const std::string& key, int level = 0, int axis = 0, bool drop_level = true) const

Series<T>

pd_series.h:15785

View

Data Manipulation#

Signature

Return Type

Location

Example

Series<T> drop(const std::vector<std::string>& labels, int axis = 0, const std::optional<std::vector<std::string>>& index = std::nullopt, const std::optional<std::vector<std::string>>& columns = std::nullopt, std::optional<int> level = std::nullopt, bool inplace = false, const std::string& errors = "raise") const

Series<T>

pd_series.h:12445

View

Series<T> drop_duplicates(const std::string& keep = "first", bool inplace = false, bool ignore_index = false) const

Series<T>

pd_series.h:5712

View

Series<T> droplevel(int level, int axis = 0) const

Series<T>

pd_series.h:12485

View

Series<T> dropna() const

Series<T>

pd_series.h:2754

View

T pop(size_t pos)

T

pd_series.h:12744

View

T pop(const std::string& label)

T

pd_series.h:12805

View

Series<T> reindex( const std::vector<std::string>& index, const std::string& method = "", const std::optional<T>& fill_value = std::nullopt, int axis = 0, bool copy = true, std::optional<int> level = std::nullopt, std::optional<int> limit = std::nullopt, std::optional<double> tolerance = std::nullopt, const std::vector<double>& new_numeric_index = {}, const std::vector<double>& old_numeric_index = {}) const

Series<T>

pd_series.h:13996

View

Result reindex_dispatch( const std::vector<std::string>& labels, const std::string& method, const FillValue& fill, const std::optional<double>& tolerance = std::nullopt, std::optional<int> limit = std::nullopt, const std::vector<double>& new_numeric_index = {}, const std::vector<double>& old_numeric_index = {} ) const

Result

pd_series.h:14808

View

Series<T> reindex_like( const Series<T>& other, const std::string& method = "", const std::optional<T>& fill_value = std::nullopt, bool copy = true, std::optional<int> limit = std::nullopt, std::optional<double> tolerance = std::nullopt) const

Series<T>

pd_series.h:15389

View

Series<std::string> reindex_promote_object_bool_fill( const std::vector<std::string>& index, const std::string& method, bool bool_fill_value) const

Series<std::string>

pd_series.h:14818

View

Series<std::string> reindex_promote_object_numeric_fill( const std::vector<std::string>& index, const std::string& method, double fill_num, bool fill_is_python_int = false) const

Series<std::string>

pd_series.h:15097

Series<std::string> reindex_promote_object_string_fill( const std::vector<std::string>& index, const std::string& method, const std::string& string_fill_value) const

Series<std::string>

pd_series.h:15340

Result reindex_with_fill( const std::vector<std::string>& labels, const std::string& method, const pandas::FillValue& fill, const std::optional<double>& tolerance = std::nullopt, std::optional<int> limit = std::nullopt, const std::vector<double>& new_numeric_idx = {}, const std::vector<double>& old_numeric_idx = {}) const

Result

pd_series.h:16284

View

std::unique_ptr<NDFrameBase> reindex_with_indexer(const numpy::NDArray<numpy::int64>& indexer) const override

std::unique_ptr<NDFrameBase>

pd_series.h:11555

View

Series<T> rename(const std::optional<std::string>& new_name, const std::optional<std::map<std::string, std::string>>& index = std::nullopt, int axis = 0, bool copy = true, bool inplace = false, std::optional<int> level = std::nullopt, const std::string& errors = "ignore") const

Series<T>

pd_series.h:8897

View

Series<T> rename_axis( const std::optional<std::string>& mapper = std::nullopt, const std::optional<std::string>& index = std::nullopt, int axis = 0, [[maybe_unused]] bool copy = true, [[maybe_unused]] bool inplace = false) const

Series<T>

pd_series.h:9020

View

Series<T> reorder_levels(const std::vector<int>& order) const

Series<T>

pd_series.h:15413

View

Series<T> replace( const T& to_replace, const T& value, bool inplace = false, std::optional<size_t> limit = std::nullopt, bool regex = false, const std::string& method = "") const

Series<T>

pd_series.h:15460

View

Series<T> replace( const std::map<T, T>& replacements, bool inplace = false, std::optional<size_t> limit = std::nullopt, bool regex = false, const std::string& method = "") const

Series<T>

pd_series.h:15498

View

void replace_value(double to_replace, double value) override

void

pd_series.h:2922

Series<T>& reset_index(bool drop = false, std::optional<int> level = std::nullopt, bool inplace = false, std::optional<int> col_level = std::nullopt, const std::string& col_fill = "", bool allow_duplicates = false, const std::optional<std::vector<std::string>>& names = std::nullopt)

Series<T>&

pd_series.h:1439

View

void reset_index_with_name(bool drop = false, const std::optional<std::string>& name = std::nullopt, std::optional<int> level = std::nullopt, bool inplace = false, std::optional<int> col_level = std::nullopt, const std::string& col_fill = "", bool allow_duplicates = false, const std::optional<std::vector<std::string>>& names = std::nullopt)

void

pd_series.h:1470

Series<T> set_axis(const std::vector<std::string>& labels, int axis = 0, bool copy = true) const

Series<T>

pd_series.h:8994

View

void set_index(std::unique_ptr<IndexBase> new_index) override

void

pd_series.h:1403

View

void set_index(const Index<IndexT>& new_index)

void

pd_series.h:1417

View

Series<T> swaplevel(int i = -2, int j = -1, bool copy = true) const

Series<T>

pd_series.h:15703

View

void update(const Series<T>& other)

void

pd_series.h:15754

View

Missing Data#

Signature

Return Type

Location

Example

Series<T> backfill( int axis = 0, bool inplace = false, std::optional<size_t> limit = std::nullopt, const std::string& downcast = "") const

Series<T>

pd_series.h:11928

View

Series<T> bfill( int axis = 0, bool inplace = false, std::optional<size_t> limit = std::nullopt, const std::string& limit_area = "", const std::string& downcast = "") const

Series<T>

pd_series.h:11842

View

Series<T> ffill( int axis = 0, bool inplace = false, std::optional<size_t> limit = std::nullopt, const std::string& limit_area = "", const std::string& downcast = "") const

Series<T>

pd_series.h:11947

View

Series<T> fillna( const T& value, const std::string& method = "", int axis = 0, bool inplace = false, std::optional<size_t> limit = std::nullopt, const std::string& downcast = "") const

Series<T>

pd_series.h:2653

View

void fillna_double(double value) override

void

pd_series.h:2834

Result fillna_resolved(const pandas::FillValue& fv) const

Result

pd_series.h:16299

View

void fillna_string(const std::string& value) override

void

pd_series.h:2896

View

Series<T> fillna_t_or_default(const pandas::FillValue& fv) const

Series<T>

pd_series.h:16336

Series<T> interpolate( const std::string& method = "linear", int axis = 0, std::optional<size_t> limit = std::nullopt, bool inplace = false, const std::string& limit_direction = "forward", const std::string& limit_area = "", const std::string& downcast = "") const

Series<T>

pd_series.h:12950

View

numpy::NDArray<numpy::bool_> isna() const

numpy::NDArray<numpy::bool_>

pd_series.h:2580

View

numpy::NDArray<numpy::bool_> isnull() const

numpy::NDArray<numpy::bool_>

pd_series.h:13520

View

numpy::NDArray<numpy::bool_> notna() const

numpy::NDArray<numpy::bool_>

pd_series.h:2610

View

numpy::NDArray<numpy::bool_> notnull() const

numpy::NDArray<numpy::bool_>

pd_series.h:13527

View

Series<T> pad( int axis = 0, bool inplace = false, std::optional<size_t> limit = std::nullopt, const std::string& downcast = "") const

Series<T>

pd_series.h:12012

View

Statistics#

Signature

Return Type

Location

Example

size_t count() const override

size_t

pd_series.h:2545

View

Series<T> cummax(int axis = 0, bool skipna = true) const

Series<T>

pd_series.h:5107

View

Series<T> cummin(int axis = 0, bool skipna = true) const

Series<T>

pd_series.h:5080

View

Series<T> cumprod(int axis = 0, bool skipna = true) const

Series<T>

pd_series.h:5020

View

Series<T> cumsum(int axis = 0, bool skipna = true) const

Series<T>

pd_series.h:4949

View

DataFrame describe( const std::vector<double>& percentiles = {0.25, 0.5, 0.75}, const std::string& include = "all", const std::string& exclude = "" ) const

DataFrame

pd_series.h:3742

View

Series<numpy::float64> describe_as_series( const std::vector<double>& percentiles = {0.25, 0.5, 0.75} ) const

Series<numpy::float64>

pd_series.h:3754

View

std::optional<double> kurt(bool skipna = true, int axis = 0, bool numeric_only = false) const

std::optional<double>

pd_series.h:3578

View

std::optional<double> kurtosis(bool skipna = true, int axis = 0, bool numeric_only = false) const

std::optional<double>

pd_series.h:3620

View

std::optional<T> max(bool skipna = true, int axis = 0, bool numeric_only = false) const

std::optional<T>

pd_series.h:3373

View

int max_decimal_places() const override

int

pd_series.h:9092

std::optional<double> mean(bool skipna = true, int axis = 0, bool numeric_only = false) const

std::optional<double>

pd_series.h:3229

View

std::optional<T> median(bool skipna = true, int axis = 0, bool numeric_only = false) const

std::optional<T>

pd_series.h:3469

View

std::optional<T> min(bool skipna = true, int axis = 0, bool numeric_only = false) const

std::optional<T>

pd_series.h:3331

View

Series<T> mode(bool dropna = true) const

Series<T>

pd_series.h:3629

View

size_t nunique(bool dropna = true) const

size_t

pd_series.h:5645

View

std::optional<T> prod(bool skipna = true, int axis = 0, bool numeric_only = false, std::optional<int> min_count = std::nullopt) const

std::optional<T>

pd_series.h:3416

View

std::optional<T> product(bool skipna = true, int axis = 0, bool numeric_only = false, std::optional<int> min_count = std::nullopt) const

std::optional<T>

pd_series.h:13757

View

std::optional<double> quantile(double q, const std::string& interpolation = "linear") const

std::optional<double>

pd_series.h:3696

View

Series<numpy::float64> quantile_list( const std::vector<double>& q_vals, const std::string& interpolation = "linear" ) const

Series<numpy::float64>

pd_series.h:3764

View

std::optional<double> sem(bool skipna = true, int ddof = 1, int axis = 0, bool numeric_only = false) const

std::optional<double>

pd_series.h:3519

View

std::optional<double> skew(bool skipna = true, int axis = 0, bool numeric_only = false) const

std::optional<double>

pd_series.h:3538

View

std::optional<double> std_(bool skipna = true, int ddof = 1, int axis = 0, bool numeric_only = false) const

std::optional<double>

pd_series.h:3256

View

auto sum(bool skipna = true, int axis = 0, bool numeric_only = false, std::optional<int> min_count = std::nullopt) const

auto

pd_series.h:3113

View

Series<numpy::int64> value_counts(bool normalize = false, bool sort = true, bool ascending = false, bool dropna = true, std::optional<int> bins = std::nullopt) const

Series<numpy::int64>

pd_series.h:5745

View

std::optional<double> var(bool skipna = true, int ddof = 1, int axis = 0, bool numeric_only = false) const

std::optional<double>

pd_series.h:3270

View

Aggregation#

Signature

Return Type

Location

Example

auto agg(Func&& func, int axis = 0) const -> decltype(func(std::declval<std::vector<T>>()))

auto

pd_series.h:6343

View

std::optional<double> agg(const std::string& func, int axis = 0) const

std::optional<double>

pd_series.h:6369

View

DataFrame agg(const std::vector<std::string>& funcs, int axis = 0) const

DataFrame

pd_series.h:6423

View

Result agg_with_dtype(const std::vector<std::string>& funcs) const

Result

pd_series.h:16297

View

auto aggregate(Func&& func, int axis = 0) const -> decltype(agg(std::forward<Func>(func), axis))

auto

pd_series.h:6431

View

std::optional<double> aggregate(const std::string& func, int axis = 0) const

std::optional<double>

pd_series.h:6440

View

DataFrame aggregate(const std::vector<std::string>& funcs, int axis = 0) const

DataFrame

pd_series.h:6452

View

auto apply(Func&& func, [[maybe_unused]] bool convert_dtype = true, [[maybe_unused]] const std::string& by_row = "compat") const

auto

pd_series.h:5867

View

auto apply(Func&& func, [[maybe_unused]] bool convert_dtype, std::tuple<Args...> args, [[maybe_unused]] const std::string& by_row = "compat") const

auto

pd_series.h:5909

View

Series<double> apply(const std::string& func, [[maybe_unused]] bool convert_dtype = true, [[maybe_unused]] const std::vector<double>& args = {}) const

Series<double>

pd_series.h:5991

View

Result apply_dispatch( const FuncArg& func, const std::function<ApplyCellResult(const MapCellInput&)>& cell_cb = {} ) const

Result

pd_series.h:5986

View

Series<T> apply_ns_transform(Fn&& transform, const std::string& result_dtype) const

Series<T>

pd_series.h:4351

View

static void apply_override_to_result(Result& r, const std::string& override_dtype)

static void

pd_series.h:16328

Result apply_resolved_typed( const std::function<pandas::ApplyCellResult(const pandas::MapCellInput&)>& cb, pandas::ApplyResultInference::ScalarKindHistogram& hist) const

Result

pd_series.h:16293

View

apply_scientific_notation(val_strs, raw_vals)

pd_series.h:10682

apply_scientific_notation(val_strs, raw_vals)

pd_series.h:11058

auto apply_with_args(Func&& func, std::tuple<Args...> args, [[maybe_unused]] bool convert_dtype = true, [[maybe_unused]] const std::string& by_row = "compat") const

auto

pd_series.h:5942

View

EWM<T> ewm( std::optional<double> com = std::nullopt, std::optional<double> span = std::nullopt, std::optional<double> halflife = std::nullopt, std::optional<double> alpha = std::nullopt, size_t min_periods = 0, bool adjust = true, bool ignore_na = false, int axis = 0, const std::string& times = "", const std::string& method = "single") const

EWM<T>

pd_series.h:6537

View

EWM<T> ewm_full( std::optional<double> com = std::nullopt, std::optional<double> span = std::nullopt, std::optional<double> halflife = std::nullopt, std::optional<double> alpha = std::nullopt, size_t min_periods = 0, bool adjust = true, bool ignore_na = false, int axis = 0, const std::string& times = "", const std::string& method = "single") const

EWM<T>

pd_series.h:6576

View

EWM<T> ewm_span(double span, bool adjust = true, bool ignore_na = false) const

EWM<T>

pd_series.h:6558

View

Expanding<T> expanding( size_t min_periods = 1, int axis = 0, const std::string& method = "single") const

Expanding<T>

pd_series.h:6510

View

SeriesGroupBy<T, GroupT> groupby(const Series<GroupT>& by, bool sort = true, int axis = 0, std::optional<int> level = std::nullopt, bool as_index = true, bool group_keys = true, bool observed = false, bool dropna = true) const

SeriesGroupBy<T, GroupT>

pd_series.h:6620

View

SeriesGroupBy<T, std::string> groupby_by_callable( const std::function<std::string(size_t, const std::string&, bool)>& convert, bool sort = true) const

SeriesGroupBy<T, std::string>

pd_series.h:4376

View

SeriesGroupBy<T, std::string> groupby_by_categorical( const CategoricalArray& cat, bool sort = true, bool observed = true) const

SeriesGroupBy<T, std::string>

pd_series.h:6668

View

SeriesGroupBy<T, std::string> groupby_by_index( bool sort = true, bool observed = true) const

SeriesGroupBy<T, std::string>

pd_series.h:6649

View

SeriesGroupBy<T, std::string> groupby_by_labels( const std::vector<std::string>& labels, const std::string& grouper_dtype = "object", bool sort = true) const

SeriesGroupBy<T, std::string>

pd_series.h:6688

View

SeriesGroupBy<T, std::string> groupby_by_level( const std::vector<size_t>& level_indices, bool sort = true) const

SeriesGroupBy<T, std::string>

pd_series.h:6631

View

SeriesGroupBy<T, std::string> groupby_by_level( size_t level_idx, bool sort = true) const

SeriesGroupBy<T, std::string>

pd_series.h:6640

View

SeriesGroupBy<T, std::string> groupby_by_level_names( const std::vector<std::string>& level_names, bool sort = true) const

SeriesGroupBy<T, std::string>

pd_series.h:6678

View

SeriesGroupBy<T, std::string> groupby_by_numeric( const Series<numpy::float64>& by_numeric, bool sort = true) const

SeriesGroupBy<T, std::string>

pd_series.h:6658

View

SeriesGroupBy<T, std::string> groupby_by_string_series( const Series<std::string>& by_str, bool sort = true) const

SeriesGroupBy<T, std::string>

pd_series.h:6699

View

Series<U> map( const std::unordered_map<T, U>& mapping, const std::string& na_action = "") const

Series<U>

pd_series.h:6038

View

auto map( Func&& func, const std::string& na_action = "") const -> Series<decltype(func(std::declval<T>()))>

auto

pd_series.h:6230

View

Series<T> map_dict(const std::map<T, T>& mapping) const

Series<T>

pd_series.h:6101

View

Result map_dict_resolved(const std::map<Key, Value>& m) const

Result

pd_series.h:16316

View

Series<T> map_series(const Series<T>& lookup) const

Series<T>

pd_series.h:6134

View

Result map_series_resolved(const pandas::Series<U>& mapper) const

Result

pd_series.h:16319

View

Series<std::string> map_to_string(const Series<std::string>& lookup) const

Series<std::string>

pd_series.h:6184

View

auto pipe(Func&& func, Args&&... args) const

auto

pd_series.h:6458

View

SeriesResampler<T> resample(const std::string& freq, const std::string& closed = "", const std::string& label = "", int axis = 0, const std::string& convention = "start", const std::string& kind = "", const std::optional<std::string>& on = std::nullopt, std::optional<int> level = std::nullopt, const std::string& origin = "start_day", std::optional<int64_t> offset = std::nullopt, bool group_keys = false) const

SeriesResampler<T>

pd_series.h:6748

View

Rolling<T> rolling( size_t window, size_t min_periods = 1, bool center = false, const std::string& win_type = "", const std::string& on = "", int axis = 0, const std::string& closed = "right", size_t step = 1, const std::string& method = "single") const

Rolling<T>

pd_series.h:6486

View

Series<T> transform(const std::string& func, int axis = 0) const

Series<T>

pd_series.h:6281

View

Series<T> transform(Func&& func, int axis = 0) const

Series<T>

pd_series.h:6318

View

DataFrame transform_named_list(const std::vector<std::string>& func_names) const

DataFrame

pd_series.h:6309

View

Result transform_resolved( const std::function<pandas::ApplyCellResult(const pandas::MapCellInput&)>& cb, pandas::ApplyResultInference::ScalarKindHistogram& hist) const

Result

pd_series.h:16321

View

Arithmetic#

Signature

Return Type

Location

Example

Series<T> add(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4208

View

Series<T> add(T scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4255

View

Series<T> add_dateoffset(const pandas::DateOffset& off) const

Series<T>

pd_series.h:4298

View

Series<T> add_dateoffset_to_timedelta(const pandas::DateOffset& off) const

Series<T>

pd_series.h:4340

View

Series<T> add_int64(int64_t scalar_ns) const

Series<T>

pd_series.h:4265

Series<T> add_prefix(const std::string& prefix, int axis = 0) const

Series<T>

pd_series.h:8941

View

Series<T> add_suffix(const std::string& suffix, int axis = 0) const

Series<T>

pd_series.h:8970

View

Series<numpy::float64> div(const Series<T>& other, T fill_value = T{1}, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::float64>

pd_series.h:4487

View

Series<numpy::float64> div(T scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::float64>

pd_series.h:4519

View

Series<numpy::float64> divide(const Series<T>& other, T fill_value = T{1}, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::float64>

pd_series.h:4537

View

std::pair<Series<T>, Series<T>> divmod(const T& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

std::pair<Series<T>, Series<T>>

pd_series.h:12352

View

std::pair<Series<T>, Series<T>> divmod(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

std::pair<Series<T>, Series<T>>

pd_series.h:12376

View

T dot(const Series<T>& other) const

T

pd_series.h:12410

View

T dot(const numpy::NDArray<T>& other) const

T

pd_series.h:12422

View

Series<T> floordiv(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4567

View

Series<T> floordiv(T scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4591

View

Series<T> mod(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4615

View

Series<T> mod(T scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4636

View

Series<T> mul(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4435

View

Series<T> mul(T scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4469

View

const MultiIndex& multiindex() const

const MultiIndex&

pd_series.h:1521

View

Series<T> multiply(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4475

View

Series<numpy::float64> pow(const Series<T>& other, std::optional<double> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::float64>

pd_series.h:4660

View

Series<numpy::float64> pow(double exponent, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::float64>

pd_series.h:4676

View

Series<T> radd(const T& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:13771

View

Series<T> radd(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:13777

View

Series<numpy::float64> rdiv(const T& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::float64>

pd_series.h:13840

View

std::pair<Series<T>, Series<T>> rdivmod(const T& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

std::pair<Series<T>, Series<T>>

pd_series.h:13949

View

Series<T> rfloordiv(const T& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:13876

View

Series<T> rmod(const T& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:13902

View

Series<T> rmul(const T& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:13819

View

Series<T> rmul(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:13825

View

Series<numpy::float64> rpow(double other, std::optional<double> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::float64>

pd_series.h:13928

View

Series<T> rsub(const T& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:13792

View

Series<T> rsub(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:13804

View

Series<numpy::float64> rtruediv(const T& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::float64>

pd_series.h:13862

View

Series<T> sub(const Series<T>& other, T fill_value = T{}, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4387

View

Series<T> sub(T scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4417

View

Series<T> sub_dateoffset(const pandas::DateOffset& off) const

Series<T>

pd_series.h:4326

View

Series<T> sub_int64(int64_t scalar_ns) const

Series<T>

pd_series.h:4289

Series<T> subtract(const Series<T>& other, T fill_value = T{}, int axis = 0, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:4423

View

Series<numpy::float64> truediv(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::float64>

pd_series.h:4549

View

Series<numpy::float64> truediv(T scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::float64>

pd_series.h:4556

View

Comparison#

Signature

Return Type

Location

Example

DataFrame compare( const Series<T>& other, int align_axis = 1, bool keep_shape = false, bool keep_equal = false, const std::pair<std::string, std::string>& result_names = {"self", "other"} ) const

DataFrame

pd_series.h:12194

View

Series<numpy::bool_> eq(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12505

View

Series<numpy::bool_> eq(const T& scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12513

View

bool equals(const Series<T>& other) const

bool

pd_series.h:12637

View

Series<numpy::bool_> ge(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12614

View

Series<numpy::bool_> ge(const T& scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12622

View

std::mt19937 gen(seed_val == 0 ? std::random_device{}() : seed_val)

std::mt19937

pd_series.h:15561

View

Series<numpy::bool_> gt(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12593

View

Series<numpy::bool_> gt(const T& scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12601

View

Series<numpy::bool_> le(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12568

View

Series<numpy::bool_> le(const T& scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12576

View

std::vector<std::vector<std::string>> level_arrays(nlevels)

std::vector<std::vector<std::string>>

pd_series.h:2069

std::vector<std::vector<std::string>> level_arrays(nlevels)

std::vector<std::vector<std::string>>

pd_series.h:2131

Series<numpy::bool_> lt(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12547

View

Series<numpy::bool_> lt(const T& scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12555

View

Series<numpy::bool_> ne(const Series<T>& other, std::optional<T> fill_value = std::nullopt, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12526

View

Series<numpy::bool_> ne(const T& scalar, int axis = 0, std::optional<int> level = std::nullopt) const

Series<numpy::bool_>

pd_series.h:12534

View

Sorting#

Signature

Return Type

Location

Example

numpy::NDArray<numpy::int64> argsort(int axis = 0, const std::string& kind = "quicksort", const std::string& order = "", bool ascending = true, bool stable = false) const

numpy::NDArray<numpy::int64>

pd_series.h:5202

View

Series<numpy::float64> rank(const std::string& method = "average", bool ascending = true, const std::string& na_option = "keep", bool pct = false, int axis = 0, bool numeric_only = false) const

Series<numpy::float64>

pd_series.h:5480

View

static std::vector<double> rank_values(const std::vector<double>& values)

static std::vector<double>

pd_series.h:16005

size_t searchsorted(const T& value, const std::string& side = "left", const std::vector<size_t>& sorter = {}) const

size_t

pd_series.h:15630

View

Series<T> sort_index(bool ascending = true, int axis = 0, bool inplace = false, const std::string& kind = "quicksort", const std::string& na_position = "last", bool sort_remaining = true, bool ignore_index = false, std::function<std::string(const std::string&)> key = nullptr, std::optional<int> level = std::nullopt) const

Series<T>

pd_series.h:5348

View

Series<T> sort_values( bool ascending = true, bool inplace = false, const std::string& kind = "quicksort", const std::string& na_position = "last", bool ignore_index = false, std::function<T(const T&)> key = nullptr, int axis = 0) const

Series<T>

pd_series.h:5244

View

Reshaping#

Signature

Return Type

Location

Example

Series<T> T_() const

Series<T>

pd_series.h:15746

View

Series<T> explode(bool ignore_index = false) const

Series<T>

pd_series.h:8857

View

Series<T> squeeze() const

Series<T>

pd_series.h:15680

View

Series<T> swapaxes(int axis1, int axis2, bool copy = true) const

Series<T>

pd_series.h:15722

View

DataFrame to_frame(const std::optional<std::string>& name = std::nullopt) const

DataFrame

pd_series.h:8240

View

Series<T> transpose() const

Series<T>

pd_series.h:15739

View

DataFrame unstack( int level = -1, std::optional<T> fill_value = std::nullopt, bool sort = true ) const

DataFrame

pd_series.h:8881

View

Combining#

Signature

Return Type

Location

Example

std::pair<Series<T>, Series<T>> align( const Series<T>& other, const std::string& join = "outer", int axis = 0, std::optional<int> level = std::nullopt, bool copy = true, const std::optional<T>& fill_value = std::nullopt, const std::string& method = "", std::optional<int> limit = std::nullopt, int fill_axis = 0, std::optional<int> broadcast_axis = std::nullopt) const

std::pair<Series<T>, Series<T>>

pd_series.h:11650

View

Series<numpy::float64> aligned_binary_op( const Series<T>& other, BinaryOp op) const

Series<numpy::float64>

pd_series.h:16102

View

Series<numpy::float64> aligned_binary_op_cross( const Series<U>& other, BinaryOp op) const

Series<numpy::float64>

pd_series.h:16188

View

Series<T> combine( const Series<T>& other, Func&& func, const std::optional<T>& fill_value = std::nullopt) const

Series<T>

pd_series.h:12086

View

Series<T> combine_first(const Series<T>& other) const

Series<T>

pd_series.h:12115

View

static Series<T> concat( const std::vector<Series<T>>& series, bool ignore_index = false, Series<numpy::float64>\* out_float = nullptr)

static Series<T>

pd_series.h:11386

View

std::unique_ptr<NDFrameBase> concat_with(const NDFrameBase& other) const override

std::unique_ptr<NDFrameBase>

pd_series.h:11215

Time Series#

Signature

Return Type

Location

Example

Series<T> asfreq( const std::string& freq, const std::string& method = "", const std::string& how = "", bool normalize = false, const std::optional<T>& fill_value = std::nullopt) const

Series<T>

pd_series.h:6771

View

std::optional<T> asof(const std::string& where, const std::vector<std::string>& subset = {}) const

std::optional<T>

pd_series.h:11745

View

T asof(size_t where) const

T

pd_series.h:15832

View

Series<T> asof(const std::vector<size_t>& where) const

Series<T>

pd_series.h:15879

View

Series<T> between_time( const std::string& start_time, const std::string& end_time, const std::string& inclusive = "both", int axis = 0 ) const

Series<T>

pd_series.h:15929

View

Series<double> diff(int periods = 1) const

Series<double>

pd_series.h:5132

View

Series<numpy::float64> pct_change( int periods = 1, const std::string& fill_method = "", std::optional<size_t> limit = std::nullopt, const std::string& freq = "") const

Series<numpy::float64>

pd_series.h:3794

View

Series<T> shift(int periods = 1, const std::optional<T>& fill_value = std::nullopt, int axis = 0, const std::string& freq = "", const std::string& suffix = "") const

Series<T>

pd_series.h:5158

View

Series<T> to_period(const std::string& freq = "", bool copy = true) const

Series<T>

pd_series.h:8260

View

Series<T> to_timestamp(const std::string& freq = "", const std::string& how = "start", bool copy = true) const

Series<T>

pd_series.h:8281

View

Series<T> tz_convert(const std::string& tz, int axis = 0, std::optional<int> level = std::nullopt, bool copy = true) const

Series<T>

pd_series.h:15958

View

Series<T> tz_localize( const std::string& tz, int axis = 0, std::optional<int> level = std::nullopt, bool copy = true, const std::string& ambiguous = "raise", const std::string& nonexistent = "raise" ) const

Series<T>

pd_series.h:15984

View

I/O#

Signature

Return Type

Location

Example

void to_clipboard(bool excel = true, const std::string& sep = "\\t") const

void

pd_series.h:8579

View

std::string to_csv( bool index = true, char sep = ',', [[maybe_unused]] const std::string& na_rep = "", [[maybe_unused]] const std::string& float_format = "", [[maybe_unused]] const std::vector<std::string>& columns = {}, bool header = true, [[maybe_unused]] const std::string& index_label = "", [[maybe_unused]] const std::string& mode = "w", [[maybe_unused]] const std::string& encoding = "utf-8", [[maybe_unused]] const std::string& compression = "infer", [[maybe_unused]] int quoting = 0, [[maybe_unused]] char quotechar = '"', const std::string& lineterminator = "\\n", [[maybe_unused]] bool doublequote = true, [[maybe_unused]] const std::string& escapechar = "", [[maybe_unused]] std::optional<size_t> chunksize = std::nullopt, [[maybe_unused]] const std::string& date_format = "", [[maybe_unused]] const std::string& errors = "strict", [[maybe_unused]] char decimal = '.', [[maybe_unused]] const std::string& storage_options = "", [[maybe_unused]] const std::string& path_or_buf = "") const

std::string

pd_series.h:8082

View

std::map<std::string, T> to_dict() const

std::map<std::string, T>

pd_series.h:8221

View

void to_excel( const std::string& excel_writer, const std::string& sheet_name = "Sheet1", const std::string& na_rep = "", const std::string& float_format = "", bool header = true, bool index = true, const std::string& index_label = "", size_t startrow = 0, size_t startcol = 0, bool merge_cells = true, const std::string& inf_rep = "inf", std::optional<std::pair<size_t, size_t>> freeze_panes = std::nullopt, [[maybe_unused]] const std::vector<std::string>& columns = {}, [[maybe_unused]] const std::string& engine = "openpyxl", [[maybe_unused]] const std::map<std::string, std::string>& engine_kwargs = {}, [[maybe_unused]] const std::string& storage_options = "") const

void

pd_series.h:8720

View

std::vector<uint8_t> to_feather( const std::string& path = "", const std::string& compression = "uncompressed", int compression_level = -1, int64_t chunksize = -1, int version = 2) const

std::vector<uint8_t>

pd_series.h:8817

View

void to_hdf( const std::string& path_or_buf, const std::string& key = "data", const std::string& mode = "a", std::optional<int> complevel = std::nullopt, const std::string& complib = "zlib", bool append = false, const std::string& format = "fixed", bool index = true, [[maybe_unused]] const std::vector<std::string>& data_columns = {}, [[maybe_unused]] bool dropna = false, [[maybe_unused]] const std::string& encoding = "utf-8", [[maybe_unused]] const std::string& errors = "strict", [[maybe_unused]] const std::map<std::string, size_t>& min_itemsize = {}, [[maybe_unused]] const std::string& nan_rep = "") const

void

pd_series.h:8643

View

std::string to_json( [[maybe_unused]] const std::string& path_or_buf = "", const std::string& orient = "index", [[maybe_unused]] const std::string& date_format = "epoch", [[maybe_unused]] int double_precision = 10, [[maybe_unused]] bool force_ascii = true, [[maybe_unused]] const std::string& date_unit = "ms", [[maybe_unused]] const std::string& default_handler = "", [[maybe_unused]] bool lines = false, [[maybe_unused]] const std::string& compression = "infer", [[maybe_unused]] bool index = true, [[maybe_unused]] int indent = 0, [[maybe_unused]] const std::string& storage_options = "", [[maybe_unused]] const std::string& mode = "w") const

std::string

pd_series.h:8146

View

std::string to_latex( bool header = true, bool index = true, [[maybe_unused]] const std::string& na_rep = "NaN", [[maybe_unused]] const std::string& float_format = "", [[maybe_unused]] const std::map<std::string, std::function<std::string(const T&)>>& formatters = {}, [[maybe_unused]] bool bold_rows = false, [[maybe_unused]] const std::string& column_format = "", [[maybe_unused]] bool longtable = false, [[maybe_unused]] bool escape = true, [[maybe_unused]] bool sparsify = true, [[maybe_unused]] bool index_names = true, [[maybe_unused]] char decimal = '.', [[maybe_unused]] bool multicolumn = true, [[maybe_unused]] const std::string& multicolumn_format = "l", [[maybe_unused]] bool multirow = false, [[maybe_unused]] const std::string& caption = "", [[maybe_unused]] const std::string& label = "", [[maybe_unused]] const std::string& position = "", [[maybe_unused]] std::ostream\* buf = nullptr, [[maybe_unused]] const std::vector<std::string>& columns = {}, [[maybe_unused]] const std::string& encoding = "utf-8") const

std::string

pd_series.h:8380

View

void to_latex(const std::string& path, bool header = true, bool index = true) const

void

pd_series.h:8441

View

file << to_latex(header, index)

file <<

pd_series.h:8446

View

std::vector<T> to_list() const

std::vector<T>

pd_series.h:8012

View

std::string to_markdown( bool index = true, [[maybe_unused]] std::ostream\* buf = nullptr, [[maybe_unused]] const std::string& mode = "wt", [[maybe_unused]] const std::string& storage_options = "") const

std::string

pd_series.h:8460

View

void to_markdown(const std::string& path, bool index = true) const

void

pd_series.h:8502

View

file << to_markdown(index)

file <<

pd_series.h:8507

View

numpy::NDArray<U> to_numpy() const

numpy::NDArray<U>

pd_series.h:7956

View

numpy::NDArray<U> to_numpy(bool copy) const

numpy::NDArray<U>

pd_series.h:7984

View

numpy::NDArray<U> to_numpy(bool copy, U na_value) const

numpy::NDArray<U>

pd_series.h:8003

View

std::vector<uint8_t> to_orc( const std::string& path = "", const std::string& engine = "pyarrow", std::optional<bool> index = std::nullopt) const

std::vector<uint8_t>

pd_series.h:8842

View

std::vector<uint8_t> to_parquet( const std::string& path = "", const std::string& engine = "pyarrow", const std::string& compression = "snappy", std::optional<bool> index = std::nullopt) const

std::vector<uint8_t>

pd_series.h:8757

View

void to_pickle( const std::string& path, [[maybe_unused]] const std::string& compression = "infer", [[maybe_unused]] int protocol = 5, [[maybe_unused]] const std::string& storage_options = "") const

void

pd_series.h:8530

View

std::optional<int64_t> to_sql( const std::string& name, const std::string& con, const std::string& schema = "", const std::string& if_exists = "fail", bool index = true, const std::string& index_label = "", std::optional<size_t> chunksize = std::nullopt, const std::map<std::string, std::string>& dtype = {}, const std::string& method = "") const

std::optional<int64_t>

pd_series.h:8683

View

std::vector<uint8_t> to_stata( const std::string& path = "", const std::map<std::string, std::string>& convert_dates = {}, bool write_index = true, const std::string& byteorder = "", const std::string& time_stamp = "", const std::string& data_label = "", const std::map<std::string, std::string>& variable_labels = {}, int version = 114) const

std::vector<uint8_t>

pd_series.h:8786

View

std::string to_string() const override

std::string

pd_series.h:10553

View

std::string to_string_full( [[maybe_unused]] std::ostream\* buf, [[maybe_unused]] const std::string& na_rep, [[maybe_unused]] const std::string& float_format, [[maybe_unused]] bool header, bool index, bool length, bool dtype, bool name, std::optional<size_t> max_rows, [[maybe_unused]] std::optional<size_t> min_rows) const

std::string

pd_series.h:10989

View

std::string to_string_multiindex() const

std::string

pd_series.h:10878

std::vector<std::string> to_string_vector() const override

std::vector<std::string>

pd_series.h:10371

View

DataArray<T> to_xarray() const

DataArray<T>

pd_series.h:8306

View

std::vector<T> tolist() const

std::vector<T>

pd_series.h:15732

View

Conversion#

Signature

Return Type

Location

Example

Series<U> astype() const

Series<U>

pd_series.h:6890

View

std::unique_ptr<NDFrameBase> astype( const std::string& dtype_str, const std::vector<std::string>& cats = {}, bool ordered = false) const

std::unique_ptr<NDFrameBase>

pd_series.h:7332

View

std::unique_ptr<NDFrameBase> astype_boolean() const

std::unique_ptr<NDFrameBase>

pd_series.h:7091

std::unique_ptr<NDFrameBase> astype_category(const std::vector<std::string>& cats, bool ordered) const

std::unique_ptr<NDFrameBase>

pd_series.h:7012

std::unique_ptr<NDFrameBase> astype_dtype(const std::string& dtype_str) const override

std::unique_ptr<NDFrameBase>

pd_series.h:11182

View

std::unique_ptr<NDFrameBase> astype_nullable_int(const std::string& dtype_str) const

std::unique_ptr<NDFrameBase>

pd_series.h:6963

std::unique_ptr<NDFrameBase> astype_numeric() const

std::unique_ptr<NDFrameBase>

pd_series.h:7271

Series<numpy::object_> astype_object() const

Series<numpy::object_>

pd_series.h:7624

std::unique_ptr<NDFrameBase> astype_period() const

std::unique_ptr<NDFrameBase>

pd_series.h:7262

std::unique_ptr<NDFrameBase> astype_string(const std::string& target_dtype) const

std::unique_ptr<NDFrameBase>

pd_series.h:7146

bool bool_() const

bool

pd_series.h:12024

View

static numpy::NDArray<numpy::bool_> bool_vec_to_cond(const std::vector<bool>& cond)

static numpy::NDArray<numpy::bool_>

pd_series.h:2442

View

std::unique_ptr<NDFrameBase> convert_dtypes( bool infer_objects = true, bool convert_string = true, bool convert_integer = true, bool convert_boolean = true, bool convert_floating = true, const std::string& dtype_backend = "numpy_nullable") const

std::unique_ptr<NDFrameBase>

pd_series.h:7659

View

Series<T> copy(bool deep = true) const

Series<T>

pd_series.h:6870

View

void copy_series_metadata_from(const Series<T>& source)

void

pd_series.h:16261

View

void copy_value_from(size_t src_idx, size_t dst_idx) override

void

pd_series.h:3086

std::unique_ptr<NDFrameBase> infer_objects(bool copy = true) const

std::unique_ptr<NDFrameBase>

pd_series.h:7787

View

Series<T> view() const

Series<T>

pd_series.h:15773

View

Iteration#

Signature

Return Type

Location

Example

std::vector<std::pair<std::string, T>> items() const

std::vector<std::pair<std::string, T>>

pd_series.h:13549

View

void items(Func&& func) const

void

pd_series.h:13563

View

std::vector<std::string> keys() const

std::vector<std::string>

pd_series.h:13573

View

Set Operations#

Signature

Return Type

Location

Example

numpy::NDArray<numpy::bool_> duplicated(const std::string& keep = "first") const

numpy::NDArray<numpy::bool_>

pd_series.h:5670

View

Series<numpy::bool_> isin(const std::vector<T>& values) const

Series<numpy::bool_>

pd_series.h:9047

View

Series<T> unique() const

Series<T>

pd_series.h:5627

View

Type Checking#

Signature

Return Type

Location

Example

static bool is_bool_string(const std::string& value)

static bool

pd_series.h:621

static bool is_float_actually_integer(double value)

static bool

pd_series.h:666

static bool is_float_string(const std::string& value)

static bool

pd_series.h:647

bool is_int_dtype() const

bool

pd_series.h:8023

static bool is_integer_string(const std::string& value)

static bool

pd_series.h:629

bool is_na_at(size_t idx) const override

bool

pd_series.h:2807

View

bool is_nullable_int_dtype() const

bool

pd_series.h:8043

Other Methods#

Signature

Return Type

Location

Example

std::vector<std::vector<double>> B_mat(static_cast<int>(nvalid), std::vector<double>(n_basis, 0.0))

std::vector<std::vector<double>>

pd_series.h:13318

std::vector<std::vector<double>> B_mat2(static_cast<int>(nvalid), std::vector<double>(n_basis, 0.0))

std::vector<std::vector<double>>

pd_series.h:13359

std::vector<std::vector<double>> B_mat2(static_cast<int>(nvalid), std::vector<double>(n_basis, 0.0))

std::vector<std::vector<double>>

pd_series.h:13376

GlobalUnlock(hMem)

pd_series.h:8606

View

SetClipboardData(CF_TEXT, hMem)

pd_series.h:8609

std::vector<std::vector<double>> VtV(pn, std::vector<double>(pn, 0.0))

std::vector<std::vector<double>>

pd_series.h:13336

Series<T> abs() const

Series<T>

pd_series.h:4896

View

bool all_values_whole_number() const override

bool

pd_series.h:9066

View

bool any_missing_cells() const

bool

pd_series.h:16332

size_t argmax() const

size_t

pd_series.h:3875

View

size_t argmin() const

size_t

pd_series.h:3844

View

std::optional<double> autocorr(int lag = 1) const

std::optional<double>

pd_series.h:11780

View

std::vector<const IndexBase\*> axes() const override

std::vector<const IndexBase*>

pd_series.h:1532

View

Series<numpy::bool_> between(const T& left, const T& right, const std::string& inclusive = "both") const

Series<numpy::bool_>

pd_series.h:4863

View

size_t cache_memory_usage() const override

size_t

pd_series.h:11620

static std::string canonical_dtype_name()

static std::string

pd_series.h:1178

View

Series<T> case_when( const std::vector<std::pair<numpy::NDArray<numpy::bool_>, T>>& caselist, const std::optional<T>& default_value = std::nullopt) const

Series<T>

pd_series.h:12047

View

CategoricalAccessor<Series<T>> cat() const

CategoricalAccessor<Series<T>>

pd_series.h:10522

View

bool cat_ordered() const override

bool

pd_series.h:1262

View

Series<numpy::float64> ceil() const

Series<numpy::float64>

pd_series.h:4724

View

clamp_bool_values(result_data, promo)

pd_series.h:3961

clamp_bool_values(result_data, promo)

pd_series.h:4043

static void clamp_bool_values(std::vector<V>& data, const std::optional<std::string>& promo)

void

pd_series.h:4192

clamp_bool_values(result_data, promo)

pd_series.h:4248

clamp_bool_values(result_data, promo)

pd_series.h:16123

clamp_bool_values(result_data, promo)

pd_series.h:16166

void clear_cache() const override

void

pd_series.h:11605

View

void clear_dtype_override() override

void

pd_series.h:1234

Series<T> clip(const T& lower, const T& upper, int axis = 0, bool inplace = false) const

Series<T>

pd_series.h:4923

View

std::unique_ptr<NDFrameBase> clone() const override

std::unique_ptr<NDFrameBase>

pd_series.h:11170

View

std::optional<double> corr( const Series<T>& other, const std::string& method = "pearson", std::optional<size_t> min_periods = std::nullopt) const

std::optional<double>

pd_series.h:12209

View

std::optional<double> cov( const Series<T>& other, std::optional<size_t> min_periods = std::nullopt, int ddof = 1) const

std::optional<double>

pd_series.h:12283

View

const std::optional<std::shared_ptr<pandas::DatetimeArray>>& datetime_array() const

const std::optional<std::shared_ptr<pandas::DatetimeArray>>&

pd_series.h:1277

View

static std::string datetime_ns_to_string(double v)

static std::string

pd_series.h:6922

View

numpy::datetime64 dt(ns, numpy::DateTimeUnit::Nanosecond)

numpy::datetime64

pd_series.h:4312

View

DatetimeProperties<Series<T>> dt() const

DatetimeProperties<Series<T>>

pd_series.h:10497

View

std::string dtype_name() const override

std::string

pd_series.h:1133

View

std::string dtype_name_full() const override

std::string

pd_series.h:1191

View

const std::optional<std::string>& dtype_override() const

const std::optional<std::string>&

pd_series.h:1167

View

emit_val(i)

pd_series.h:8181

emit_val(i)

pd_series.h:8200

emit_val(i)

pd_series.h:8209

bool empty() const override

bool

pd_series.h:1097

View

std::pair<numpy::NDArray<numpy::int64>, Series<T>> factorize(bool sort = false, bool use_na_sentinel = true) const

std::pair<numpy::NDArray<numpy::int64>, Series<T>>

pd_series.h:12669

View

std::ofstream file(path)

std::ofstream

pd_series.h:8442

View

std::ofstream file(path)

std::ofstream

pd_series.h:8503

View

std::ofstream file(path, std::ios::binary)

std::ofstream

pd_series.h:8535

View

Series<T> filter( const std::optional<std::vector<std::string>>& items = std::nullopt, const std::optional<std::string>& like = std::nullopt, const std::optional<std::string>& regex = std::nullopt, int axis = 0) const

Series<T>

pd_series.h:12823

View

Series<T> filter_by_bool_mask(const pandas::BooleanMask& mask) const

Series<T>

pd_series.h:1706

View

Series<T> filter_by_bool_series(const Series<numpy::bool_>& cond) const

Series<T>

pd_series.h:1726

View

Series<numpy::float64> floor() const

Series<numpy::float64>

pd_series.h:4710

View

oss << fmt_cat(0) << sep << fmt_cat(1) << line_sep << indent

oss << fmt_cat(0) << sep <<

pd_series.h:10855

oss << fmt_cat(2) << sep << fmt_cat(3) << sep << "..." << line_sep << indent

oss << fmt_cat(2) << sep <<

pd_series.h:10857

oss << fmt_cat(last_start) << sep << fmt_cat(last_start + 1) << sep

oss << fmt_cat(last_start) << sep <<

pd_series.h:10860

oss << fmt_cat(last_start + 2) << line_sep << indent

oss <<

pd_series.h:10861

oss << fmt_cat(last_start + 3)

oss <<

pd_series.h:10862

oss << fmt_cat(i)

oss <<

pd_series.h:10866

std::string format_display_name() const

std::string

pd_series.h:1353

func(index_->get_value_str(i), data_.getElementAt({i}))

pd_series.h:13565

View

bool has_cached_values() const override

bool

pd_series.h:11609

View

bool has_cat_categories() const override

bool

pd_series.h:1241

View

bool has_mask() const override

bool

pd_series.h:1291

View

bool has_multiindex() const

bool

pd_series.h:1514

View

bool has_nan_values() const

bool

pd_series.h:6905

bool hasnans() const override

bool

pd_series.h:2524

View

std::string indent(header.size(), ' ')

std::string

pd_series.h:10851

const IndexBase& index() const override

const IndexBase&

pd_series.h:1389

View

IndexBase& index_mut()

IndexBase&

pd_series.h:1396

View

static std::string infer_best_dtype( const std::vector<std::string>& str_values, const std::string& source_dtype, bool convert_string, bool convert_integer, bool convert_boolean, bool convert_floating)

static std::string

pd_series.h:681

void info(std::ostream\* buf = nullptr, bool verbose = true, int max_cols = 0, bool memory_usage = true, bool show_counts = true) const

void

pd_series.h:12918

View

void invalidate_caches() const

void

pd_series.h:16024

T item() const

T

pd_series.h:13535

View

std::unique_ptr<IndexBase> make_default_index(size_t sz) const

std::unique_ptr<IndexBase>

pd_series.h:16047

memcpy(pMem, text.c_str(), text.size() + 1)

pd_series.h:8605

View

size_t memory_usage(bool index = true, bool deep = false) const

size_t

pd_series.h:13608

View

const std::optional<std::shared_ptr<pandas::MixedTzDatetimeArray>>& mixed_tz_array() const

const std::optional<std::shared_ptr<pandas::MixedTzDatetimeArray>>&

pd_series.h:1282

View

std::optional<std::string> name() const override

std::optional<std::string>

pd_series.h:1308

View

const std::string& name_interval_closed() const

const std::string&

pd_series.h:1344

double name_interval_left() const

double

pd_series.h:1342

double name_interval_right() const

double

pd_series.h:1343

bool name_is_int() const

bool

pd_series.h:1328

bool name_is_interval() const

bool

pd_series.h:1335

bool name_is_timestamp() const

bool

pd_series.h:1366

bool name_is_tuple() const

bool

pd_series.h:1346

int64_t name_timestamp_ns() const

int64_t

pd_series.h:1372

const std::string& name_timestamp_tz() const

const std::string&

pd_series.h:1373

const std::vector<std::string>& name_tuple_elements() const

const std::vector<std::string>&

pd_series.h:1350

size_t nbytes() const override

size_t

pd_series.h:1118

View

size_t ndim() const override

size_t

pd_series.h:1111

View

static std::optional<std::string> promote_arith_dtype( const std::optional<std::string>& a, const std::optional<std::string>& b)

static std::optional<std::string>

pd_series.h:4118

static std::optional<std::string> promote_div_dtype( const std::optional<std::string>& a, const std::optional<std::string>& b)

static std::optional<std::string>

pd_series.h:4165

static std::optional<std::string> promote_div_scalar_dtype( const std::optional<std::string>& series_dt, T2 /\*scalar\*/)

std::optional<std::string>

pd_series.h:4183

static std::optional<std::string> promote_scalar_dtype( const std::optional<std::string>& series_dt, T2 scalar)

std::optional<std::string>

pd_series.h:4140

propagate_index_to(result, len, other)

pd_series.h:3964

propagate_index_to(result, len, other)

pd_series.h:3984

propagate_index_to(result, len, other)

pd_series.h:4012

propagate_index_to(result, len, other)

pd_series.h:4032

propagate_index_to(result, len, other)

pd_series.h:4251

propagate_index_to(result, len, other)

pd_series.h:4413

propagate_index_to(result, len, other)

pd_series.h:4465

propagate_index_to(res, len, other)

pd_series.h:4587

propagate_index_to(res, len, other)

pd_series.h:4632

propagate_index_to(res, len, other)

pd_series.h:4672

void propagate_index_to(Series<T>& result, size_t result_len) const

void

pd_series.h:16056

void propagate_index_to(Series<T>& result, size_t result_len, const Series<T>& other) const

void

pd_series.h:16070

numpy::NDArray<T> ravel() const

numpy::NDArray<T>

pd_series.h:13974

View

Series<T> repeat(size_t repeats, int axis = 0) const

Series<T>

pd_series.h:15432

View

std::string repr() const override

std::string

pd_series.h:11133

View

size_t resolve_multiindex_level(const std::string& name) const

size_t

pd_series.h:6705

View

size_t resolve_multiindex_level(int level) const

size_t

pd_series.h:6716

View

size_t resolve_slice_bound( const std::variant<std::monostate, std::int64_t, std::string, pandas::Timestamp, pandas::Timedelta>& v, bool is_start, bool /\*inclusive\*/) const

size_t

pd_series.h:1832

resolve_slice_bound(spec.start, true, true))

pd_series.h:1942

resolve_slice_bound(spec.stop, true, true))

pd_series.h:1951

std::string result(buf)

std::string

pd_series.h:9454

View

std::string result(buf)

std::string

pd_series.h:9599

View

Series<numpy::float64> round(int decimals = 0) const

Series<numpy::float64>

pd_series.h:4695

View

std::string s(buf)

std::string

pd_series.h:9117

View

std::string s(buf)

std::string

pd_series.h:9178

View

std::string s(buf)

std::string

pd_series.h:9233

View

std::string s(buf)

std::string

pd_series.h:9738

View

std::string s(buf)

std::string

pd_series.h:9778

View

std::string s(buf)

std::string

pd_series.h:14891

View

std::string s(buf)

std::string

pd_series.h:15230

View

std::string s(buf)

std::string

pd_series.h:15281

View

void set_cat_categories(const std::vector<std::string>& cats) override

void

pd_series.h:1255

View

void set_cat_categories_dtype(const std::string& dtype) override

void

pd_series.h:1270

void set_cat_ordered(bool ordered) override

void

pd_series.h:1267

View

void set_datetime_array(std::shared_ptr<pandas::DatetimeArray> arr) { datetime_array_ = std::move(arr)

void

pd_series.h:1278

View

void set_dtype_override(const std::string& dtype_str) override

void

pd_series.h:1211

View

void set_freq(const std::optional<std::string>& freq)

void

pd_series.h:10537

View

void set_mask(const numpy::NDArray<numpy::bool_>& mask)

void

pd_series.h:1285

View

void set_mixed_tz_array(std::shared_ptr<pandas::MixedTzDatetimeArray> arr) { mixed_tz_array_ = std::move(arr)

void

pd_series.h:1283

View

void set_multiindex(MultiIndex multi_idx)

void

pd_series.h:1501

View

void set_name(const std::optional<std::string>& name) override

void

pd_series.h:1315

View

void set_name_interval_data(double left, double right, const std::string& closed)

void

pd_series.h:1337

void set_name_is_int(bool flag)

void

pd_series.h:1333

void set_name_is_interval(bool flag)

void

pd_series.h:1336

void set_name_is_timestamp(bool flag)

void

pd_series.h:1367

void set_name_is_tuple(bool flag)

void

pd_series.h:1347

void set_name_timestamp_data(int64_t ns, const std::string& tz = "")

void

pd_series.h:1368

void set_name_tuple_elements(const std::vector<std::string>& elems)

void

pd_series.h:1348

void set_name_tuple_elements(std::vector<std::string>&& elems) { name_tuple_elements_ = std::move(elems)

void

pd_series.h:1349

void set_sparse_fill_value(double fv) override

void

pd_series.h:10534

std::vector<size_t> shape() const override

std::vector<size_t>

pd_series.h:1104

View

size_t size() const override

size_t

pd_series.h:1090

View

propagate_index_to(result, size())

propagate_index_to(result,

pd_series.h:4046

View

propagate_index_to(result, size())

propagate_index_to(result,

pd_series.h:4058

View

propagate_index_to(result, size())

propagate_index_to(result,

pd_series.h:4078

View

propagate_index_to(result, size())

propagate_index_to(result,

pd_series.h:4092

View

propagate_index_to(result, size())

propagate_index_to(result,

pd_series.h:4109

View

propagate_index_to(result, size())

propagate_index_to(result,

pd_series.h:4282

View

propagate_index_to(result, size())

propagate_index_to(result,

pd_series.h:4318

View

propagate_index_to(result, size())

propagate_index_to(result,

pd_series.h:4364

View

propagate_index_to(res, size())

propagate_index_to(res,

pd_series.h:4604

View

propagate_index_to(res, size())

propagate_index_to(res,

pd_series.h:4649

View

propagate_index_to(res, size())

propagate_index_to(res,

pd_series.h:4684

View

propagate_index_to(res, size())

propagate_index_to(res,

pd_series.h:4703

View

propagate_index_to(res, size())

propagate_index_to(res,

pd_series.h:4912

View

propagate_index_to(q, size())

propagate_index_to(q,

pd_series.h:12371

View

propagate_index_to(r, size())

propagate_index_to(r,

pd_series.h:12372

View

propagate_index_to(q, size(), other)

propagate_index_to(q,

pd_series.h:12400

View

propagate_index_to(r, size(), other)

propagate_index_to(r,

pd_series.h:12401

View

propagate_index_to(result, size())

propagate_index_to(result,

pd_series.h:13800

View

propagate_index_to(res, size())

propagate_index_to(res,

pd_series.h:13849

View

propagate_index_to(res, size())

propagate_index_to(res,

pd_series.h:13889

View

propagate_index_to(res, size())

propagate_index_to(res,

pd_series.h:13915

View

propagate_index_to(res, size())

propagate_index_to(res,

pd_series.h:13936

View

propagate_index_to(q, size())

propagate_index_to(q,

pd_series.h:13966

View

propagate_index_to(r, size())

propagate_index_to(r,

pd_series.h:13967

View

SparseAccessor<Series<T>> sparse(double fill_value = 0.0) const

SparseAccessor<Series<T>>

pd_series.h:10546

View

StringMethods<Series<T>> str() const

StringMethods<Series<T>>

pd_series.h:10442

View

trim_zeros_column(val_strs)

pd_series.h:10681

trim_zeros_column(val_strs)

pd_series.h:11057

Series<T> truncate(const std::optional<std::string>& before = std::nullopt, const std::optional<std::string>& after = std::nullopt, int axis = 0, bool copy = true) const

Series<T>

pd_series.h:6830

View

void validate_index_data_alignment() const

void

pd_series.h:16039

std::unordered_set<T, std::hash<T>> value_set(values.begin(), values.end())

std::unordered_set<T, std::hash<T>>

pd_series.h:9048

const numpy::NDArray<T>& values() const

const numpy::NDArray<T>&

pd_series.h:1382

View

Code Examples#

The following examples are extracted from the test suite.

Series (pd_test_1_all.cpp:11161)
11151            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153            // Pipe applies function to entire Series
11154            auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155                auto mean_val = ser.mean();
11156                std::vector<double> result;
11157                for (size_t i = 0; i < ser.size(); ++i) {
11158                    result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159                }
11160                return pandas::Series<double>(result, ser.name());
11161            };
11162
11163            auto result = s.pipe(add_mean, 10.0);
11164
11165            bool passed = true;
11166            // mean is 2.5, offset is 10.0, so each value + 12.5
11167            std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168            for (size_t i = 0; i < result.size(); ++i) {
11169                if (!approx_equal(result[i], expected[i])) {
11170                    passed = false;
Series (pd_test_1_all.cpp:11161)
11151            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153            // Pipe applies function to entire Series
11154            auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155                auto mean_val = ser.mean();
11156                std::vector<double> result;
11157                for (size_t i = 0; i < ser.size(); ++i) {
11158                    result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159                }
11160                return pandas::Series<double>(result, ser.name());
11161            };
11162
11163            auto result = s.pipe(add_mean, 10.0);
11164
11165            bool passed = true;
11166            // mean is 2.5, offset is 10.0, so each value + 12.5
11167            std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168            for (size_t i = 0; i < result.size(); ++i) {
11169                if (!approx_equal(result[i], expected[i])) {
11170                    passed = false;
Series (pd_test_1_all.cpp:11161)
11151            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153            // Pipe applies function to entire Series
11154            auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155                auto mean_val = ser.mean();
11156                std::vector<double> result;
11157                for (size_t i = 0; i < ser.size(); ++i) {
11158                    result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159                }
11160                return pandas::Series<double>(result, ser.name());
11161            };
11162
11163            auto result = s.pipe(add_mean, 10.0);
11164
11165            bool passed = true;
11166            // mean is 2.5, offset is 10.0, so each value + 12.5
11167            std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168            for (size_t i = 0; i < result.size(); ++i) {
11169                if (!approx_equal(result[i], expected[i])) {
11170                    passed = false;
Series (pd_test_1_all.cpp:11161)
11151            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153            // Pipe applies function to entire Series
11154            auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155                auto mean_val = ser.mean();
11156                std::vector<double> result;
11157                for (size_t i = 0; i < ser.size(); ++i) {
11158                    result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159                }
11160                return pandas::Series<double>(result, ser.name());
11161            };
11162
11163            auto result = s.pipe(add_mean, 10.0);
11164
11165            bool passed = true;
11166            // mean is 2.5, offset is 10.0, so each value + 12.5
11167            std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168            for (size_t i = 0; i < result.size(); ++i) {
11169                if (!approx_equal(result[i], expected[i])) {
11170                    passed = false;
Series (pd_test_1_all.cpp:11161)
11151            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153            // Pipe applies function to entire Series
11154            auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155                auto mean_val = ser.mean();
11156                std::vector<double> result;
11157                for (size_t i = 0; i < ser.size(); ++i) {
11158                    result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159                }
11160                return pandas::Series<double>(result, ser.name());
11161            };
11162
11163            auto result = s.pipe(add_mean, 10.0);
11164
11165            bool passed = true;
11166            // mean is 2.5, offset is 10.0, so each value + 12.5
11167            std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168            for (size_t i = 0; i < result.size(); ++i) {
11169                if (!approx_equal(result[i], expected[i])) {
11170                    passed = false;
Series (pd_test_1_all.cpp:11161)
11151            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153            // Pipe applies function to entire Series
11154            auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155                auto mean_val = ser.mean();
11156                std::vector<double> result;
11157                for (size_t i = 0; i < ser.size(); ++i) {
11158                    result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159                }
11160                return pandas::Series<double>(result, ser.name());
11161            };
11162
11163            auto result = s.pipe(add_mean, 10.0);
11164
11165            bool passed = true;
11166            // mean is 2.5, offset is 10.0, so each value + 12.5
11167            std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168            for (size_t i = 0; i < result.size(); ++i) {
11169                if (!approx_equal(result[i], expected[i])) {
11170                    passed = false;
Series (pd_test_1_all.cpp:11161)
11151            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11152
11153            // Pipe applies function to entire Series
11154            auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11155                auto mean_val = ser.mean();
11156                std::vector<double> result;
11157                for (size_t i = 0; i < ser.size(); ++i) {
11158                    result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11159                }
11160                return pandas::Series<double>(result, ser.name());
11161            };
11162
11163            auto result = s.pipe(add_mean, 10.0);
11164
11165            bool passed = true;
11166            // mean is 2.5, offset is 10.0, so each value + 12.5
11167            std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11168            for (size_t i = 0; i < result.size(); ++i) {
11169                if (!approx_equal(result[i], expected[i])) {
11170                    passed = false;
at (pd_test_1_all.cpp:6581)
6571            // Test isna/notna with float data
6572            {
6573                std::map<std::string, std::vector<numpy::float64>> float_data;
6574                float_data["X"] = {1.0, std::nan(""), 3.0};
6575                float_data["Y"] = {4.0, 5.0, std::nan("")};
6576                pandas::DataFrame df_na(float_data);
6577
6578                auto na_mask = df_na.isna();
6579                // Row 1, col 0 (X) should be NA
6580                if (!na_mask.getElementAt({1, 0})) {
6581                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (1,0) should be true" << std::endl;
6582                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (1,0)");
6583                }
6584                // Row 2, col 1 (Y) should be NA
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
at_time (pd_test_2_all.cpp:728)
718        std::cout << "====================================== [OK] pd_test_asof test suite ========================== " << std::endl;
719        return 0;
720    }
721
722} // namespace dataframe_tests
723// ------------------- pd_test_asof.cpp (end) -----------------------------
724
725// ------------------- pd_test_at_time.cpp (start) -----------------------------
726// dataframe_tests/pd_test_at_time.cpp
727// Tests for DataFrame.at_time() method (pandas 2.0+ API)
728// Selects values at particular time of day from datetime-indexed DataFrame
729#include <iostream>
730#include <stdexcept>
731#include <vector>
732#include <string>
733#include <map>
734#include "../pandas/pd_dataframe.h"
735
736// CRITICAL: No using namespace directives
first (pd_test_1_all.cpp:11616)
11606        void pd_test_groupby_first_last() {
11607            std::cout << "========= GroupBy first/last ====================";
11608
11609            std::map<std::string, std::vector<double>> data = {
11610                {"category", {1.0, 1.0, 2.0, 2.0}},
11611                {"value", {10.0, 20.0, 30.0, 40.0}}
11612            };
11613            pandas::DataFrame df(data);
11614
11615            auto first_result = df.groupby("category").first();
11616            auto last_result = df.groupby("category").last();
11617
11618            // First for group 1: 10, group 2: 30
11619            // Last for group 1: 20, group 2: 40
11620            double first1 = std::stod(first_result["value"].get_value_str(0));
11621            double first2 = std::stod(first_result["value"].get_value_str(1));
11622
11623            bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11624                          (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11625            if (!passed) {
first_valid_index (pd_test_1_all.cpp:20555)
20545            std::vector<double> values = {
20546                std::numeric_limits<double>::quiet_NaN(),
20547                std::numeric_limits<double>::quiet_NaN(),
20548                3.0,
20549                4.0,
20550                5.0
20551            };
20552            pandas::Series<double> s(values, "test");
20553
20554            auto first_idx = s.first_valid_index();
20555
20556            bool passed = first_idx.has_value() && first_idx.value() == 2;
20557
20558            if (!passed) {
20559                std::cout << "  [FAIL] : in pd_test_timeseries_first_valid_index() : expected index 2" << std::endl;
20560                throw std::runtime_error("pd_test_timeseries_first_valid_index failed");
20561            }
20562
20563            std::cout << " -> tests passed" << std::endl;
20564        }
get (pd_test_1_all.cpp:10290)
10280void pd_test_extension_index_get_loc_unique() {
10281    std::cout << "========= get_loc (unique) =========================";
10282
10283    pandas::CategoricalArray arr({"apple", "banana", "cherry"});
10284    pandas::CategoricalIndex idx(arr);
10285
10286    auto loc_apple = idx.get_loc("apple");
10287    auto loc_banana = idx.get_loc("banana");
10288    auto loc_cherry = idx.get_loc("cherry");
10289
10290    bool passed = (std::holds_alternative<size_t>(loc_apple) && std::get<size_t>(loc_apple) == 0 &&
10291                   std::get<size_t>(loc_banana) == 1 &&
10292                   std::get<size_t>(loc_cherry) == 2);
10293    if (!passed) {
10294        std::cout << "  [FAIL] : in pd_test_extension_index_get_loc_unique() : get_loc check failed" << std::endl;
10295        throw std::runtime_error("pd_test_extension_index_get_loc_unique failed");
10296    }
10297
10298    std::cout << " -> tests passed" << std::endl;
10299}
get_by_label_duplicates (pd_test_3_all.cpp:28133)
28123    if (fail) throw std::runtime_error("pd_test_getitem_timedelta_str_not_found failed");
28124}
28125
28126void pd_test_getitem_duplicate_labels() {
28127    std::cout << "  -- pd_test_getitem_duplicate_labels --" << std::endl;
28128    int fail = 0;
28129    std::vector<numpy::float64> vals{1.0, 2.0, 3.0, 4.0};
28130    pandas::Series<numpy::float64> s(vals, "x");
28131    pandas::Index<std::string> idx(std::vector<std::string>{"a", "x", "x", "x"});
28132    s.set_index(idx);
28133    auto lookup = s.get_by_label_duplicates("x");
28134    if (lookup.count != 3) { std::cout << "    FAIL: expected count=3, got " << lookup.count << std::endl; fail++; }
28135    if (!lookup.sub_series.has_value()) { std::cout << "    FAIL: sub_series missing" << std::endl; fail++; }
28136    else if (lookup.sub_series->size() != 3) { std::cout << "    FAIL: sub size " << lookup.sub_series->size() << std::endl; fail++; }
28137    auto lookup_one = s.get_by_label_duplicates("a");
28138    if (lookup_one.count != 1) { std::cout << "    FAIL: expected count=1" << std::endl; fail++; }
28139    if (lookup_one.single_index != 0) { std::cout << "    FAIL: expected single_index=0" << std::endl; fail++; }
28140    auto lookup_zero = s.get_by_label_duplicates("z");
28141    if (lookup_zero.count != 0) { std::cout << "    FAIL: expected count=0" << std::endl; fail++; }
28142    if (fail == 0) std::cout << "    OK" << std::endl;
28143    if (fail) throw std::runtime_error("pd_test_getitem_duplicate_labels failed");
get_cat_categories (pd_test_2_all.cpp:20374)
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
20377    check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
20381    std::cout << "pd_test_getitem_dispatch_index_propagation" << std::endl;
20382
20383    // Test DatetimeIndex freq propagation
20384    pandas::DataFrame df;
get_freq (pd_test_2_all.cpp:20397)
20387    std::vector<numpy::datetime64> ts = {
20388        numpy::datetime64(0LL, numpy::DateTimeUnit::Day),
20389        numpy::datetime64(1LL, numpy::DateTimeUnit::Day),
20390        numpy::datetime64(2LL, numpy::DateTimeUnit::Day)
20391    };
20392    auto dt_idx = std::make_unique<pandas::DatetimeIndex>(ts);
20393    dt_idx->set_freq(std::string("D"));
20394    df.set_index(std::move(dt_idx));
20395
20396    auto s = df.extract_column_as_numeric_series("val");
20397    check(s.get_freq().has_value(), "freq propagated");
20398    if (s.get_freq().has_value()) {
20399        check(s.get_freq().value() == "D", "freq value D");
20400    }
20401
20402    // Test MultiIndex propagation
20403    pandas::DataFrame df2;
20404    std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405    df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406    std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407    std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
get_optional (pd_test_1_all.cpp:6741)
6731                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex cols");
6732                }
6733            }
6734
6735            // Test get_optional
6736            {
6737                std::map<std::string, std::vector<int>> data;
6738                data["A"] = {1, 2, 3};
6739                pandas::DataFrame df(data);
6740
6741                auto col_opt = df.get_optional("A");
6742                if (!col_opt.has_value()) {
6743                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : get_optional A should exist" << std::endl;
6744                    throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional A");
6745                }
6746
6747                auto missing = df.get_optional("Z");
6748                if (missing.has_value()) {
6749                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : get_optional Z should not exist" << std::endl;
6750                    throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional Z");
6751                }
get_slice_as_series (pd_test_3_all.cpp:28169)
28159    }
28160    if (fail == 0) std::cout << "    OK" << std::endl;
28161    if (fail) throw std::runtime_error("pd_test_getitem_duplicate_preserves_dtype failed");
28162}
28163
28164void pd_test_getitem_period_sub_series() {
28165    std::cout << "  -- pd_test_getitem_period_sub_series --" << std::endl;
28166    int fail = 0;
28167    std::vector<numpy::float64> vals{10.0, 20.0, 30.0, 40.0, 50.0};
28168    pandas::Series<numpy::float64> s(vals, "v");
28169    auto sub = s.get_slice_as_series(1, 4);
28170    if (sub.size() != 3) { std::cout << "    FAIL: size " << sub.size() << std::endl; fail++; }
28171    if (sub[0] != 20.0) { std::cout << "    FAIL: [0] " << sub[0] << std::endl; fail++; }
28172    if (sub[2] != 40.0) { std::cout << "    FAIL: [2] " << sub[2] << std::endl; fail++; }
28173    if (fail == 0) std::cout << "    OK" << std::endl;
28174    if (fail) throw std::runtime_error("pd_test_getitem_period_sub_series failed");
28175}
28176
28177int pd_test_getitem_edge_main_impl() {
28178    std::cout << "====================================== pd_test_getitem_edge ==============================" << std::endl;
28179    int fail = 0;
get_string (pd_test_3_all.cpp:27746)
27736        }
27737    }
27738
27739    pandas::Series<numpy::int64> si({10, 20, 30}, "ints");
27740    auto result2 = si.astype("str");
27741    auto* str_s2 = dynamic_cast<pandas::Series<std::string>*>(result2.get());
27742    if (!str_s2) {
27743        std::cout << "    FAIL: expected Series<string> from int" << std::endl;
27744        fail++;
27745    } else {
27746        if (str_s2->get_string(0) != "10") {
27747            std::cout << "    FAIL: expected '10', got '" << str_s2->get_string(0) << "'" << std::endl;
27748            fail++;
27749        }
27750    }
27751
27752    if (fail == 0) std::cout << "    OK" << std::endl;
27753}
27754
27755void pd_test_astype_datetime_to_string() {
27756    std::cout << "  -- pd_test_astype_datetime_to_string --" << std::endl;
get_value_bool (pd_test_5_all.cpp:35197)
35187    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35188    pandas_tests::check(df["X"].get_value_double(0) == 1.0, "case_2.idx0_one", local_fail);
35189    pandas_tests::check(std::isnan(df["X"].get_value_double(1)),
35190                        "case_2.idx1_nan", local_fail);
35191    pandas_tests::check(df["X"].get_value_double(2) == 0.0, "case_2.idx2_zero", local_fail);
35192}
35193
35194void bool_nullable_826495_case_3_get_value_bool_mask_aware(int& local_fail) {
35195    pandas::DataFrame df;
35196    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35197    pandas_tests::check(df["X"].get_value_bool(0) == true,  "case_3.idx0_true",   local_fail);
35198    pandas_tests::check(df["X"].get_value_bool(1) == false, "case_3.idx1_NA_false", local_fail);
35199    pandas_tests::check(df["X"].get_value_bool(2) == false, "case_3.idx2_false",  local_fail);
35200}
35201
35202void bool_nullable_826495_case_4_is_na_at_mask_aware(int& local_fail) {
35203    pandas::DataFrame df;
35204    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35205    pandas_tests::check(df["X"].is_na_at(0) == false, "case_4.idx0_not_na", local_fail);
35206    pandas_tests::check(df["X"].is_na_at(1) == true,  "case_4.idx1_is_na",  local_fail);
35207    pandas_tests::check(df["X"].is_na_at(2) == false, "case_4.idx2_not_na", local_fail);
get_value_double (pd_test_2_all.cpp:19160)
19150    std::map<std::string, std::string> col_funcs;
19151    col_funcs["a"] = "sum";
19152    col_funcs["b"] = "mean";
19153
19154    pandas::Series<numpy::float64> result = df.agg_to_series(col_funcs);
19155
19156    // a.sum() = 10.0, b.mean() = 25.0
19157    check(result.size() == 2, "result_size_2");
19158
19159    // std::map iterates in alphabetical order: a, b
19160    check(std::abs(result.get_value_double(0) - 10.0) < 1e-9, "a_sum_10");
19161    check(std::abs(result.get_value_double(1) - 25.0) < 1e-9, "b_mean_25");
19162
19163    // Check index labels
19164    check(result.index().get_value_str(0) == "a", "index_0_a");
19165    check(result.index().get_value_str(1) == "b", "index_1_b");
19166}
19167
19168void pd_test_agg_dispatch_dict_simple_single_col() {
19169    std::cout << "  -- pd_test_agg_dispatch_dict_simple_single_col --" << std::endl;
get_value_str (pd_test_1_all.cpp:4665)
4655            auto corr_df = df.corr();
4656
4657            // Check dimensions
4658            bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659            if (!passed) {
4660                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662            }
4663
4664            // Diagonal should be 1.0
4665            std::string aa = corr_df["A"].get_value_str(0);
4666            passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667            if (!passed) {
4668                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
4669                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: diagonal should be 1.0");
4670            }
4671
4672            // A-B correlation should be 1.0 (perfect correlation)
4673            std::string ab = corr_df["B"].get_value_str(0);
4674            passed = std::abs(std::stod(ab) - 1.0) < 0.001;
4675            if (!passed) {
get_value_str (pd_test_1_all.cpp:4665)
4655            auto corr_df = df.corr();
4656
4657            // Check dimensions
4658            bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659            if (!passed) {
4660                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662            }
4663
4664            // Diagonal should be 1.0
4665            std::string aa = corr_df["A"].get_value_str(0);
4666            passed = std::abs(std::stod(aa) - 1.0) < 0.001;
4667            if (!passed) {
4668                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : diagonal should be 1.0" << std::endl;
4669                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: diagonal should be 1.0");
4670            }
4671
4672            // A-B correlation should be 1.0 (perfect correlation)
4673            std::string ab = corr_df["B"].get_value_str(0);
4674            passed = std::abs(std::stod(ab) - 1.0) < 0.001;
4675            if (!passed) {
head (pd_test_1_all.cpp:6301)
6291        void pd_test_dataframe_indexing() {
6292            std::cout << "========= indexing (loc/iloc) ==============";
6293
6294            std::map<std::string, std::vector<numpy::float64>> data;
6295            data["A"] = {10.0, 20.0, 30.0, 40.0, 50.0};
6296            data["B"] = {1.0, 2.0, 3.0, 4.0, 5.0};
6297
6298            pandas::DataFrame df(data);
6299
6300            // Test head
6301            auto head_df = df.head(3);
6302            if (head_df.nrows() != 3) {
6303                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : head(3) nrows != 3" << std::endl;
6304                throw std::runtime_error("pd_test_dataframe_indexing failed: head(3) nrows != 3");
6305            }
6306
6307            // Test tail
6308            auto tail_df = df.tail(2);
6309            if (tail_df.nrows() != 2) {
6310                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311                throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
iat (pd_test_1_all.cpp:22028)
22018            pandas::DataFrame result = df.where(cond, -1.0);
22019
22020            // Get column index for A - it's sorted alphabetically in std::map
22021            size_t col_a_idx = df.get_column_index("A");
22022            size_t col_b_idx = df.get_column_index("B");
22023
22024            bool passed = true;
22025            std::string error_msg;
22026
22027            // Check A column values
22028            std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
22029            std::string a1 = result.iat<double>(1, col_a_idx) == -1.0 ? "ok" : "fail";
22030            std::string a2 = result.iat<double>(2, col_a_idx) == 3.0 ? "ok" : "fail";
22031            std::string a3 = result.iat<double>(3, col_a_idx) == 4.0 ? "ok" : "fail";
22032
22033            if (a0 != "ok" || a1 != "ok" || a2 != "ok" || a3 != "ok") {
22034                passed = false;
22035                error_msg = "Column A values incorrect: A[0]=" + a0 + ", A[1]=" + a1 +
22036                            ", A[2]=" + a2 + ", A[3]=" + a3;
22037            }
iat_resolved (pd_test_5_all.cpp:92945)
92935    pandas_tests::check(result.iat(0) == 1.0,
92936        "f_test_25_loc_list_positions_happy_553011.iat0_eq_1", local_fail);
92937    pandas_tests::check(result.iat(2) == 5.0,
92938        "f_test_25_loc_list_positions_happy_553011.iat2_eq_5", local_fail);
92939}
92940
92941void case_23_iat_resolved_positive() {
92942    std::cout << "-- case_23_iat_resolved_positive()\n";
92943    int local_fail = 0;
92944    auto s = make_f64_series_5();
92945    pandas_tests::check(s.iat_resolved(0) == 1.0,
92946        "f_test_25_iat_resolved_positive_446201.idx0_eq_1", local_fail);
92947    pandas_tests::check(s.iat_resolved(2) == 3.0,
92948        "f_test_25_iat_resolved_positive_446201.idx2_eq_3", local_fail);
92949    pandas_tests::check(s.iat_resolved(4) == 5.0,
92950        "f_test_25_iat_resolved_positive_446201.idx4_eq_5", local_fail);
92951}
92952
92953void case_24_iat_resolved_negative_one() {
92954    std::cout << "-- case_24_iat_resolved_negative_one()\n";
92955    int local_fail = 0;
idxmax (pd_test_1_all.cpp:23956)
23946        std::cout << "====================================== [OK] pd_test_ffill_bfill test suite ========================== " << std::endl;
23947        return 0;
23948    }
23949
23950} // namespace dataframe_tests
23951// ------------------- pd_test_ffill_bfill.cpp (end) -----------------------------
23952
23953// ------------------- pd_test_idxmax_idxmin.cpp (start) -----------------------------
23954// dataframe_tests/pd_test_idxmax_idxmin.cpp
23955// Test for DataFrame.idxmax() and idxmin() methods
23956
23957#include <iostream>
23958#include <stdexcept>
23959#include <cmath>
23960#include <limits>
23961#include "../pandas/pd_dataframe.h"
23962
23963// CRITICAL: No using namespace directives
23964
23965namespace dataframe_tests {
idxmax_typed (pd_test_3_all.cpp:26151)
26141// ------------------- pd_test_where_series (end) ---------------------
26142
26143// ------------------- pd_test_idxmax_min_typed (begin) ---------------
26144#include "../pandas/pd_datetime_index.h"
26145
26146namespace dataframe_tests_idxmax_min_typed {
26147
26148void pd_test_idxmax_min_typed_regular_max() {
26149    std::cout << "  pd_test_idxmax_min_typed_regular_max: ";
26150    ::pandas::Series<::numpy::float64> s({1.0, 3.0, 2.0, 5.0, 4.0});
26151    auto [is_dt, data] = s.idxmax_typed();
26152    if (is_dt) throw std::runtime_error("Expected non-datetime result");
26153    if (data.second != "3") throw std::runtime_error("Expected label '3', got '" + data.second + "'");
26154    std::cout << "PASSED" << std::endl;
26155}
26156
26157void pd_test_idxmax_min_typed_regular_min() {
26158    std::cout << "  pd_test_idxmax_min_typed_regular_min: ";
26159    ::pandas::Series<::numpy::float64> s({3.0, 1.0, 2.0, 5.0, 4.0});
26160    auto [is_dt, data] = s.idxmin_typed();
26161    if (is_dt) throw std::runtime_error("Expected non-datetime result");
idxmin (pd_test_1_all.cpp:23956)
23946        std::cout << "====================================== [OK] pd_test_ffill_bfill test suite ========================== " << std::endl;
23947        return 0;
23948    }
23949
23950} // namespace dataframe_tests
23951// ------------------- pd_test_ffill_bfill.cpp (end) -----------------------------
23952
23953// ------------------- pd_test_idxmax_idxmin.cpp (start) -----------------------------
23954// dataframe_tests/pd_test_idxmax_idxmin.cpp
23955// Test for DataFrame.idxmax() and idxmin() methods
23956
23957#include <iostream>
23958#include <stdexcept>
23959#include <cmath>
23960#include <limits>
23961#include "../pandas/pd_dataframe.h"
23962
23963// CRITICAL: No using namespace directives
23964
23965namespace dataframe_tests {
idxmin_typed (pd_test_3_all.cpp:26160)
26150    ::pandas::Series<::numpy::float64> s({1.0, 3.0, 2.0, 5.0, 4.0});
26151    auto [is_dt, data] = s.idxmax_typed();
26152    if (is_dt) throw std::runtime_error("Expected non-datetime result");
26153    if (data.second != "3") throw std::runtime_error("Expected label '3', got '" + data.second + "'");
26154    std::cout << "PASSED" << std::endl;
26155}
26156
26157void pd_test_idxmax_min_typed_regular_min() {
26158    std::cout << "  pd_test_idxmax_min_typed_regular_min: ";
26159    ::pandas::Series<::numpy::float64> s({3.0, 1.0, 2.0, 5.0, 4.0});
26160    auto [is_dt, data] = s.idxmin_typed();
26161    if (is_dt) throw std::runtime_error("Expected non-datetime result");
26162    if (data.second != "1") throw std::runtime_error("Expected label '1', got '" + data.second + "'");
26163    std::cout << "PASSED" << std::endl;
26164}
26165
26166void pd_test_idxmax_min_typed_datetime_max() {
26167    std::cout << "  pd_test_idxmax_min_typed_datetime_max: ";
26168    // Create DatetimeIndex with 3 timestamps, values [1,3,2] => max at pos 1
26169    ::pandas::Series<::numpy::float64> s({1.0, 3.0, 2.0});
26170    std::vector<::numpy::datetime64> dates;
iloc (pd_test_1_all.cpp:19149)
19139            pandas::Series<int> s({10, 20, 30, 40, 50});
19140
19141            // Positional indexing
19142            bool passed = s[0] == 10 && s[4] == 50 && s.at(2) == 30;
19143            if (!passed) {
19144                std::cout << "  [FAIL] : in pd_test_series_indexing() : positional indexing failed" << std::endl;
19145                throw std::runtime_error("pd_test_series_indexing failed: positional indexing");
19146            }
19147
19148            // iloc slice
19149            auto slice = s.iloc(1, 4);
19150            passed = slice.size() == 3 && slice[0] == 20 && slice[2] == 40;
19151            if (!passed) {
19152                std::cout << "  [FAIL] : in pd_test_series_indexing() : iloc slice failed" << std::endl;
19153                throw std::runtime_error("pd_test_series_indexing failed: iloc slice");
19154            }
19155
19156            // iloc with indices
19157            auto selected = s.iloc({0, 2, 4});
19158            passed = selected.size() == 3 && selected[0] == 10 && selected[1] == 30 && selected[2] == 50;
19159            if (!passed) {
iloc (pd_test_1_all.cpp:19149)
19139            pandas::Series<int> s({10, 20, 30, 40, 50});
19140
19141            // Positional indexing
19142            bool passed = s[0] == 10 && s[4] == 50 && s.at(2) == 30;
19143            if (!passed) {
19144                std::cout << "  [FAIL] : in pd_test_series_indexing() : positional indexing failed" << std::endl;
19145                throw std::runtime_error("pd_test_series_indexing failed: positional indexing");
19146            }
19147
19148            // iloc slice
19149            auto slice = s.iloc(1, 4);
19150            passed = slice.size() == 3 && slice[0] == 20 && slice[2] == 40;
19151            if (!passed) {
19152                std::cout << "  [FAIL] : in pd_test_series_indexing() : iloc slice failed" << std::endl;
19153                throw std::runtime_error("pd_test_series_indexing failed: iloc slice");
19154            }
19155
19156            // iloc with indices
19157            auto selected = s.iloc({0, 2, 4});
19158            passed = selected.size() == 3 && selected[0] == 10 && selected[1] == 30 && selected[2] == 50;
19159            if (!passed) {
last (pd_test_1_all.cpp:11617)
11607        void pd_test_groupby_first_last() {
11608            std::cout << "========= GroupBy first/last ====================";
11609
11610            std::map<std::string, std::vector<double>> data = {
11611                {"category", {1.0, 1.0, 2.0, 2.0}},
11612                {"value", {10.0, 20.0, 30.0, 40.0}}
11613            };
11614            pandas::DataFrame df(data);
11615
11616            auto first_result = df.groupby("category").first();
11617            auto last_result = df.groupby("category").last();
11618
11619            // First for group 1: 10, group 2: 30
11620            // Last for group 1: 20, group 2: 40
11621            double first1 = std::stod(first_result["value"].get_value_str(0));
11622            double first2 = std::stod(first_result["value"].get_value_str(1));
11623
11624            bool passed = ((std::abs(first1 - 10.0) < 0.001 && std::abs(first2 - 30.0) < 0.001) ||
11625                          (std::abs(first1 - 30.0) < 0.001 && std::abs(first2 - 10.0) < 0.001));
11626            if (!passed) {
11627                std::cout << "  [FAIL] : in pd_test_groupby_first_last() : first values incorrect" << std::endl;
last_valid_index (pd_test_1_all.cpp:20579)
20569            std::vector<double> values = {
20570                1.0,
20571                2.0,
20572                3.0,
20573                std::numeric_limits<double>::quiet_NaN(),
20574                std::numeric_limits<double>::quiet_NaN()
20575            };
20576            pandas::Series<double> s(values, "test");
20577
20578            auto last_idx = s.last_valid_index();
20579
20580            bool passed = last_idx.has_value() && last_idx.value() == 2;
20581
20582            if (!passed) {
20583                std::cout << "  [FAIL] : in pd_test_timeseries_last_valid_index() : expected index 2" << std::endl;
20584                throw std::runtime_error("pd_test_timeseries_last_valid_index failed");
20585            }
20586
20587            std::cout << " -> tests passed" << std::endl;
20588        }
loc (pd_test_3_all.cpp:10916)
10906        {{"A","A","B"}, {"x","y","x"}});
10907    df.set_index(mi);
10908    if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909        std::cout << "  [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910        throw std::runtime_error("set_index MultiIndex failed");
10911    }
10912    std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916    std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917    pandas::DataFrame df;
10918    df.add_column<int64_t>("val", {10, 20, 30, 40});
10919    auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920        {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921    df.set_multiindex(mi);
10922    pandas::DataFrame result = df.loc("London");
10923    if (result.nrows() != 2) {
10924        std::cout << "  [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925        throw std::runtime_error("loc single-arg failed");
10926    }
loc (pd_test_3_all.cpp:10916)
10906        {{"A","A","B"}, {"x","y","x"}});
10907    df.set_index(mi);
10908    if (!df.has_multiindex() || df.multiindex().nlevels() != 2) {
10909        std::cout << "  [FAIL] : in pd_test_3_all_set_index_multiindex()" << std::endl;
10910        throw std::runtime_error("set_index MultiIndex failed");
10911    }
10912    std::cout << " -> tests passed" << std::endl;
10913}
10914
10915void pd_test_3_all_loc_single_arg() {
10916    std::cout << "========= Fix 3: loc(string) MultiIndex =================";
10917    pandas::DataFrame df;
10918    df.add_column<int64_t>("val", {10, 20, 30, 40});
10919    auto mi = pandas::MultiIndex::from_arrays<std::string>(
10920        {{"London","London","Paris","Paris"}, {"2020","2021","2020","2021"}});
10921    df.set_multiindex(mi);
10922    pandas::DataFrame result = df.loc("London");
10923    if (result.nrows() != 2) {
10924        std::cout << "  [FAIL] : in pd_test_3_all_loc_single_arg() : expected 2 rows" << std::endl;
10925        throw std::runtime_error("loc single-arg failed");
10926    }
loc_slice (pd_test_5_all.cpp:92767)
92757void case_13_loc_slice_none_bounds() {
92758    // start=None, stop=None, step=+1: full copy.
92759    std::cout << "-- case_13_loc_slice_none_bounds()\n";
92760    int local_fail = 0;
92761    auto s = make_f64_series_5();
92762    pandas::SliceSpec spec;
92763    spec.start = std::monostate{};
92764    spec.stop = std::monostate{};
92765    spec.step = 1;
92766    auto result = s.loc_slice(spec);
92767    pandas_tests::check(result.size() == 5,
92768        "f_test_25_loc_slice_none_bounds_310099.size_eq_5", local_fail);
92769    pandas_tests::check(result.iat(0) == 1.0,
92770        "f_test_25_loc_slice_none_bounds_310099.iat0_eq_1", local_fail);
92771    pandas_tests::check(result.iat(4) == 5.0,
92772        "f_test_25_loc_slice_none_bounds_310099.iat4_eq_5", local_fail);
92773}
92774
92775void case_14_loc_slice_int_bounds() {
92776    // start=int(1), stop=int(4), step=+1: positions [1,4) -> 3 elements.
loc_string (pd_test_5_all.cpp:90637)
90627        throw std::runtime_error(case_label + " failed");
90628    }
90629    std::cout << "  " << case_label << " -> OK\n";
90630}
90631
90632void case_1_plain_string_index_exact() {
90633    std::cout << "-- case_1_plain_string_index_exact\n";
90634    auto s = make_float_series_with_str_index({10.0, 20.0, 30.0},
90635                                              {"a", "b", "c"});
90636    int local_fail = 0;
90637    pandas::StringLookupResult r = s.loc_string("b");
90638    pandas_tests::check(r.kind == pandas::StringLookupResult::Kind::SCALAR,
90639                        "case_1.kind_is_SCALAR", local_fail);
90640    pandas_tests::check(r.scalar_index == 1,
90641                        "case_1.scalar_index_eq_1", local_fail);
90642    pandas_tests::check(r.range == std::make_pair<size_t, size_t>(0, 0),
90643                        "case_1.range_unused_default", local_fail);
90644    finish_case("case_1_plain_string_index_exact", local_fail);
90645}
90646
90647void case_2_plain_string_index_miss() {
loc_timedelta (pd_test_5_all.cpp:92739)
92729        "f_test_25_loc_timestamp_edge_first_996311.kind_scalar", local_fail);
92730    pandas_tests::check(r.scalar == 10.0,
92731        "f_test_25_loc_timestamp_edge_first_996311.scalar_eq_10", local_fail);
92732}
92733
92734void case_11_loc_timedelta_exact_hit() {
92735    std::cout << "-- case_11_loc_timedelta_exact_hit()\n";
92736    int local_fail = 0;
92737    auto s = make_tdi_f64_series_3();
92738    pandas::Timedelta key(2, 0);  // 2 days
92739    auto r = s.loc_timedelta(key);
92740    pandas_tests::check(
92741        r.kind == pandas::LookupResult<numpy::float64>::Kind::SCALAR,
92742        "f_test_25_loc_timedelta_exact_hit_421003.kind_scalar", local_fail);
92743    pandas_tests::check(r.scalar == 200.0,
92744        "f_test_25_loc_timedelta_exact_hit_421003.scalar_eq_200", local_fail);
92745}
92746
92747void case_12_loc_timedelta_not_found() {
92748    std::cout << "-- case_12_loc_timedelta_not_found()\n";
92749    int local_fail = 0;
loc_timestamp (pd_test_5_all.cpp:92701)
92691    pandas_tests::check(threw,
92692        "f_test_25_filter_bool_series_mismatch_throws_172339.value_error",
92693        local_fail);
92694}
92695
92696void case_8_loc_timestamp_exact_hit() {
92697    std::cout << "-- case_8_loc_timestamp_exact_hit()\n";
92698    int local_fail = 0;
92699    auto s = make_dti_f64_series_3();
92700    pandas::Timestamp key(2024, 1, 2, 0, 0, 0);
92701    auto r = s.loc_timestamp(key);
92702    pandas_tests::check(
92703        r.kind == pandas::LookupResult<numpy::float64>::Kind::SCALAR,
92704        "f_test_25_loc_timestamp_exact_hit_318227.kind_scalar", local_fail);
92705    pandas_tests::check(r.scalar == 20.0,
92706        "f_test_25_loc_timestamp_exact_hit_318227.scalar_eq_20", local_fail);
92707}
92708
92709void case_9_loc_timestamp_not_found() {
92710    std::cout << "-- case_9_loc_timestamp_not_found()\n";
92711    int local_fail = 0;
mask (pd_test_1_all.cpp:9119)
9109void pd_test_datetime_mixin_array_constructor() {
9110    std::cout << "========= DatetimeTDMixin array constructor =========================";
9111
9112    // Create DatetimeArray with some values
9113    numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9114    data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2001
9115    data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2017
9116    data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2020
9117
9118    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9119    mask.setElementAt({0}, numpy::bool_(false));
9120    mask.setElementAt({1}, numpy::bool_(false));
9121    mask.setElementAt({2}, numpy::bool_(false));
9122
9123    pandas::DatetimeArray arr(data, mask);
9124    pandas::DatetimeTDMixin idx(arr, "timestamps");
9125
9126    bool passed = (idx.size() == 3 && !idx.empty() &&
9127                   idx.name().has_value() && *idx.name() == "timestamps" &&
9128                   idx.inferred_type() == "datetime");
mask (pd_test_1_all.cpp:9119)
9109void pd_test_datetime_mixin_array_constructor() {
9110    std::cout << "========= DatetimeTDMixin array constructor =========================";
9111
9112    // Create DatetimeArray with some values
9113    numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9114    data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2001
9115    data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2017
9116    data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2020
9117
9118    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9119    mask.setElementAt({0}, numpy::bool_(false));
9120    mask.setElementAt({1}, numpy::bool_(false));
9121    mask.setElementAt({2}, numpy::bool_(false));
9122
9123    pandas::DatetimeArray arr(data, mask);
9124    pandas::DatetimeTDMixin idx(arr, "timestamps");
9125
9126    bool passed = (idx.size() == 3 && !idx.empty() &&
9127                   idx.name().has_value() && *idx.name() == "timestamps" &&
9128                   idx.inferred_type() == "datetime");
mask_at (pd_test_3_all.cpp:27712)
27702        fail++;
27703    } else {
27704        if (bool_s->dtype_name() != "boolean") {
27705            std::cout << "    FAIL: dtype should be boolean, got " << bool_s->dtype_name() << std::endl;
27706            fail++;
27707        }
27708        if (!bool_s->has_mask()) {
27709            std::cout << "    FAIL: should have mask for NA" << std::endl;
27710            fail++;
27711        } else {
27712            if (!bool_s->mask_at(2)) {
27713                std::cout << "    FAIL: position 2 should be masked (NA)" << std::endl;
27714                fail++;
27715            }
27716        }
27717    }
27718
27719    if (fail == 0) std::cout << "    OK" << std::endl;
27720}
27721
27722void pd_test_astype_to_string() {
nlargest (pd_test_1_all.cpp:6425)
6415            // Test sort_values descending
6416            auto sorted_desc = df.sort_values("A", false);
6417            first_val = sorted_desc["A"].get_value_str(0);
6418            if (std::stod(first_val) != 5.0) {
6419                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values desc first != 5" << std::endl;
6420                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values desc first != 5");
6421            }
6422
6423            // Test nlargest
6424            auto largest = df.nlargest(2, "A");
6425            if (largest.nrows() != 2) {
6426                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : nlargest nrows != 2" << std::endl;
6427                throw std::runtime_error("pd_test_dataframe_sorting failed: nlargest nrows != 2");
6428            }
6429
6430            // Test nsmallest
6431            auto smallest = df.nsmallest(2, "A");
6432            if (smallest.nrows() != 2) {
6433                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : nsmallest nrows != 2" << std::endl;
6434                throw std::runtime_error("pd_test_dataframe_sorting failed: nsmallest nrows != 2");
nsmallest (pd_test_1_all.cpp:6432)
6422            }
6423
6424            // Test nlargest
6425            auto largest = df.nlargest(2, "A");
6426            if (largest.nrows() != 2) {
6427                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : nlargest nrows != 2" << std::endl;
6428                throw std::runtime_error("pd_test_dataframe_sorting failed: nlargest nrows != 2");
6429            }
6430
6431            // Test nsmallest
6432            auto smallest = df.nsmallest(2, "A");
6433            if (smallest.nrows() != 2) {
6434                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : nsmallest nrows != 2" << std::endl;
6435                throw std::runtime_error("pd_test_dataframe_sorting failed: nsmallest nrows != 2");
6436            }
6437
6438            std::cout << " -> tests passed" << std::endl;
6439        }
6440
6441        // =====================================================================
6442        // Test: Rank
sample (pd_test_3_all.cpp:207)
197        if (df.index().dtype_name() != "int64") {
198            std::cout << "  [FAIL] : in pd_test_3_all_dtype_typing_overloads() : cookbook_105 integration dtype" << std::endl;
199            throw std::runtime_error("pd_test_3_all_dtype_typing_overloads failed: cookbook_105 integration dtype");
200        }
201    }
202
203    std::cout << " -> tests passed" << std::endl;
204}
205
206void pd_test_3_all_sample() {
207    std::cout << "========= DataFrame.sample() =======================";
208
209    std::map<std::string, std::vector<double>> data = {
210        {"A", {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}},
211        {"B", {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}}
212    };
213    pandas::DataFrame df(data);
214
215    // Sample 3 rows
216    pandas::DataFrame result = df.sample(3, 42);  // seed=42 for reproducibility
set_value_nan (pd_test_5_all.cpp:18478)
18468        "0    a\n"
18469        "1  NaN\n"
18470        "2    c";
18471    bool ok = (actual == expected);
18472    pandas_tests::check(ok, "where_mask_dtype_promotion_2_503514_case_10_str_col_where_default.to_string", local_fail);
18473    if (!ok) dump_diff("case_10", expected, actual);
18474}
18475
18476void where_mask_dtype_promotion_2_503514_case_11_get_value_str_mask_int_renders_NaN(int& local_fail) {
18477    pandas::Series<std::int64_t> s({10, 20, 30});
18478    s.set_value_nan(0);
18479
18480    std::string actual = s.get_value_str(0);
18481    std::string expected = "NaN";
18482    bool ok = (actual == expected);
18483    pandas_tests::check(ok, "where_mask_dtype_promotion_2_503514_case_11_get_value_str_mask_int_renders_NaN (got " +
18484          actual + ")", local_fail);
18485
18486    bool ok1 = (s.get_value_str(1) == "20");
18487    bool ok2 = (s.get_value_str(2) == "30");
18488    pandas_tests::check(ok1, "case_11.kept_idx1_eq_20", local_fail);
set_value_with_enlarge (pd_test_3_all.cpp:29152)
29142static int sm_check(bool cond, const char* msg) {
29143    if (!cond) { std::cout << "    FAIL: " << msg << std::endl; return 1; }
29144    return 0;
29145}
29146
29147void pd_test_setitem_multicolumn_series_enlarge_int() {
29148    std::cout << "  -- pd_test_setitem_multicolumn_series_enlarge_int --" << std::endl;
29149    int fail = 0;
29150    std::vector<numpy::float64> v = {1.0, 2.0, 3.0};
29151    pandas::Series<numpy::float64> s(v);
29152    bool grew = s.set_value_with_enlarge("5", 99.0);
29153    fail += sm_check(grew, "enlargement reported");
29154    fail += sm_check(s.size() == 4, "size grew to 4");
29155    fail += sm_check(s[3] == 99.0, "appended value");
29156    auto k = s.keys();
29157    fail += sm_check(k.size() == 4 && k[3] == "5", "label appended");
29158    if (fail == 0) std::cout << "    OK" << std::endl;
29159    if (fail != 0) throw std::runtime_error("pd_test_setitem_multicolumn_series_enlarge_int failed");
29160}
29161
29162void pd_test_setitem_multicolumn_series_enlarge_string() {
tail (pd_test_1_all.cpp:6308)
6298            pandas::DataFrame df(data);
6299
6300            // Test head
6301            auto head_df = df.head(3);
6302            if (head_df.nrows() != 3) {
6303                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : head(3) nrows != 3" << std::endl;
6304                throw std::runtime_error("pd_test_dataframe_indexing failed: head(3) nrows != 3");
6305            }
6306
6307            // Test tail
6308            auto tail_df = df.tail(2);
6309            if (tail_df.nrows() != 2) {
6310                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : tail(2) nrows != 2" << std::endl;
6311                throw std::runtime_error("pd_test_dataframe_indexing failed: tail(2) nrows != 2");
6312            }
6313
6314            // Test iloc_rows range
6315            auto slice = df.iloc_rows(1, 4);
6316            if (slice.nrows() != 3) {
6317                std::cout << "  [FAIL] : in pd_test_dataframe_indexing() : iloc_rows(1,4) nrows != 3" << std::endl;
6318                throw std::runtime_error("pd_test_dataframe_indexing failed: iloc_rows(1,4) nrows != 3");
take (pd_test_1_all.cpp:5903)
5893// Inherited Operations Tests
5894// ============================================================================
5895
5896void pd_test_categorical_index_take() {
5897    std::cout << "========= inherited take ==============================";
5898
5899    pandas::CategoricalArray arr({"a", "b", "c", "d"});
5900    pandas::CategoricalIndex idx(arr);
5901
5902    std::vector<size_t> indices = {0, 2, 3};
5903    pandas::ExtensionIndex<pandas::CategoricalArray> taken = idx.take(indices);
5904
5905    bool passed = (taken.size() == 3);
5906    if (!passed) {
5907        std::cout << "  [FAIL] : in pd_test_categorical_index_take()" << std::endl;
5908        throw std::runtime_error("pd_test_categorical_index_take failed");
5909    }
5910
5911    std::cout << " -> tests passed" << std::endl;
5912}
where (pd_test_1_all.cpp:22018)
22008            data["B"] = {5.0, 6.0, 7.0, 8.0};
22009            pandas::DataFrame df(data);
22010
22011            // Create condition DataFrame (values > 2)
22012            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22014            cond_data["B"] = {true, true, true, true};     // all >2
22015            pandas::DataFrame cond(cond_data);
22016
22017            // Apply where with replacement value -1
22018            pandas::DataFrame result = df.where(cond, -1.0);
22019
22020            // Get column index for A - it's sorted alphabetically in std::map
22021            size_t col_a_idx = df.get_column_index("A");
22022            size_t col_b_idx = df.get_column_index("B");
22023
22024            bool passed = true;
22025            std::string error_msg;
22026
22027            // Check A column values
22028            std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where (pd_test_1_all.cpp:22018)
22008            data["B"] = {5.0, 6.0, 7.0, 8.0};
22009            pandas::DataFrame df(data);
22010
22011            // Create condition DataFrame (values > 2)
22012            std::map<std::string, std::vector<numpy::bool_>> cond_data;
22013            cond_data["A"] = {false, false, true, true};   // 1<=2, 2<=2, 3>2, 4>2
22014            cond_data["B"] = {true, true, true, true};     // all >2
22015            pandas::DataFrame cond(cond_data);
22016
22017            // Apply where with replacement value -1
22018            pandas::DataFrame result = df.where(cond, -1.0);
22019
22020            // Get column index for A - it's sorted alphabetically in std::map
22021            size_t col_a_idx = df.get_column_index("A");
22022            size_t col_b_idx = df.get_column_index("B");
22023
22024            bool passed = true;
22025            std::string error_msg;
22026
22027            // Check A column values
22028            std::string a0 = result.iat<double>(0, col_a_idx) == -1.0 ? "ok" : "fail";
where_resolved_typed (pd_test_5_all.cpp:144251)
144241    auto s = mk_str({"a", "b", "c"}, {"0", "1", "2"});
144242    auto r = s.fillna_resolved(pandas::FillValue::of_bool(true));
144243    check_dtype_eq("C_26_case_70_fillna_bool_string()_dtype",
144244                   result_series_dtype_full(r), "object", local_fail);
144245}
144246
144247void case_71_where_int_int_other(int& local_fail) {
144248    std::cout << "-- case_71_where_int_int_other\n";
144249    auto s = mk_f64({10, 20, 30}, {"0", "1", "2"}, "int64");
144250    auto cond = mk_cond_tft();
144251    auto r = s.where_resolved_typed(cond, pandas::FillValue::of_numeric_int(-1.0));
144252    check_dtype_eq("C_26_case_71_where_int_int_other()_dtype",
144253                   result_series_dtype_full(r), "int64", local_fail);
144254}
144255
144256void case_72_where_int_float_other(int& local_fail) {
144257    std::cout << "-- case_72_where_int_float_other\n";
144258    auto s = mk_f64({10, 20, 30}, {"0", "1", "2"}, "int64");
144259    auto cond = mk_cond_tft();
144260    auto r = s.where_resolved_typed(cond, pandas::FillValue::of_numeric(0.5));
144261    check_dtype_eq("C_26_case_72_where_int_float_other()_dtype",
xs (pd_test_2_all.cpp:18668)
18658        std::cout << "====================================== [OK] pd_test_tz_localize test suite ========================== " << std::endl;
18659        return 0;
18660    }
18661
18662} // namespace dataframe_tests
18663// ------------------- pd_test_tz_localize.cpp (end) -----------------------------
18664
18665// ------------------- pd_test_xs.cpp (start) -----------------------------
18666// dataframe_tests/pd_test_xs.cpp
18667// Tests for DataFrame xs() (cross-section) implementation
18668
18669#include <iostream>
18670#include <stdexcept>
18671#include <vector>
18672#include <string>
18673#include <map>
18674
18675#include "../pandas/pd_dataframe.h"
18676
18677// CRITICAL: No using namespace directives
drop (pd_test_1_all.cpp:6558)
6548            if (df.ncols() != 2) {
6549                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550                throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551            }
6552            if (!popped) {
6553                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554                throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555            }
6556
6557            // Test drop columns
6558            auto dropped = df.drop(std::vector<std::string>{"B"}, 1);
6559            if (dropped.ncols() != 1) {
6560                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : drop ncols != 1" << std::endl;
6561                throw std::runtime_error("pd_test_dataframe_manipulation failed: drop ncols != 1");
6562            }
6563
6564            // Test rename
6565            auto renamed = df.rename_columns(std::map<std::string, std::string>{{"A", "X"}});
6566            if (!renamed.has_column("X")) {
6567                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : rename failed" << std::endl;
6568                throw std::runtime_error("pd_test_dataframe_manipulation failed: rename failed");
drop_duplicates (pd_test_1_all.cpp:6639)
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
6634                std::map<std::string, std::vector<numpy::int64>> dup_data;
6635                dup_data["A"] = {1, 1, 2, 2};
6636                dup_data["B"] = {1, 1, 2, 3};
6637                pandas::DataFrame df_dup(dup_data);
6638
6639                auto deduped = df_dup.drop_duplicates();
6640                // Rows 0 and 1 are duplicates (A=1, B=1), so should have 3 rows
6641                if (deduped.nrows() != 3) {
6642                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : drop_duplicates nrows != 3, got " << deduped.nrows() << std::endl;
6643                    throw std::runtime_error("pd_test_dataframe_manipulation failed: drop_duplicates");
6644                }
6645            }
6646
6647            // Test assign
6648            {
6649                std::map<std::string, std::vector<numpy::int64>> assign_data;
droplevel (pd_test_1_all.cpp:14428)
14418        void pd_test_multiindex_droplevel() {
14419            std::cout << "========= droplevel =================================== ";
14420
14421            std::vector<std::vector<std::string>> arrays = {
14422                {"a", "a", "b"},
14423                {"x", "y", "z"},
14424                {"1", "2", "3"}
14425            };
14426
14427            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14428            pandas::MultiIndex dropped = mi.droplevel(1);
14429
14430            bool passed = true;
14431
14432            if (dropped.nlevels() != 2) {
14433                std::cout << "  [FAIL] : nlevels should be 2 after drop" << std::endl;
14434                passed = false;
14435            }
14436
14437            // Check remaining levels
14438            auto tup = dropped[0];
dropna (pd_test_1_all.cpp:531)
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
pop (pd_test_1_all.cpp:6547)
6537            pandas::DataFrame df(data);
6538
6539            // Test add_column
6540            df.add_column<numpy::int64>("C", {7, 8, 9});
6541            if (df.ncols() != 3) {
6542                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : add_column ncols != 3" << std::endl;
6543                throw std::runtime_error("pd_test_dataframe_manipulation failed: add_column ncols != 3");
6544            }
6545
6546            // Test pop
6547            auto popped = df.pop("C");
6548            if (df.ncols() != 2) {
6549                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550                throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551            }
6552            if (!popped) {
6553                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554                throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555            }
6556
6557            // Test drop columns
pop (pd_test_1_all.cpp:6547)
6537            pandas::DataFrame df(data);
6538
6539            // Test add_column
6540            df.add_column<numpy::int64>("C", {7, 8, 9});
6541            if (df.ncols() != 3) {
6542                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : add_column ncols != 3" << std::endl;
6543                throw std::runtime_error("pd_test_dataframe_manipulation failed: add_column ncols != 3");
6544            }
6545
6546            // Test pop
6547            auto popped = df.pop("C");
6548            if (df.ncols() != 2) {
6549                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : pop ncols != 2" << std::endl;
6550                throw std::runtime_error("pd_test_dataframe_manipulation failed: pop ncols != 2");
6551            }
6552            if (!popped) {
6553                std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : popped is null" << std::endl;
6554                throw std::runtime_error("pd_test_dataframe_manipulation failed: popped is null");
6555            }
6556
6557            // Test drop columns
reindex (pd_test_1_all.cpp:6708)
6698                }
6699            }
6700
6701            // Test reindex rows
6702            {
6703                std::map<std::string, std::vector<double>> data;
6704                data["A"] = {1.0, 2.0, 3.0};
6705                pandas::DataFrame df(data);
6706                df = df.set_axis({"x", "y", "z"}, 0);
6707
6708                auto reindexed = df.reindex({"x", "z", "w"}, 0);
6709                if (reindexed.nrows() != 3) {
6710                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : reindex wrong nrows" << std::endl;
6711                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex nrows");
6712                }
6713                // 'w' should have NaN
6714                std::string val = reindexed["A"].get_value_str(2);
6715                if (!std::isnan(std::stod(val))) {
6716                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : missing label should be NaN" << std::endl;
6717                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex NaN");
6718                }
reindex_dispatch (pd_test_5_all.cpp:61046)
61036    for (size_t i = 0; i < n; ++i) out.push_back(std::to_string(i));
61037    return out;
61038}
61039
61040template <typename T>
61041static std::pair<std::string, std::string>
61042run_numeric_fill_lt29(const std::vector<T>& vals,
61043                      const std::vector<std::string>& new_idx,
61044                      double fill) {
61045    auto s = make_series_with_idx_lt29<T>(vals, src_idx_for_lt29(vals.size()));
61046    pandas::Result r = s.reindex_dispatch(
61047        new_idx, "", pandas::FillValue::of_numeric(fill));
61048
61049    if (std::holds_alternative<
61050            std::unique_ptr<pandas::Series<std::string>>>(r.value)) {
61051        auto& sp = std::get<
61052            std::unique_ptr<pandas::Series<std::string>>>(r.value);
61053        auto df = sp->to_frame(std::optional<std::string>("v"));
61054        auto dts = df.dtypes();
61055        return {df.to_string(),
61056                dts.empty() ? std::string("<no col>") : dts[0]};
reindex_like (pd_test_1_all.cpp:6777)
6767                data1["A"] = {1, 2};
6768                data1["B"] = {3, 4};
6769                pandas::DataFrame df1(data1);
6770
6771                std::map<std::string, std::vector<int>> data2;
6772                data2["B"] = {10, 20, 30};
6773                data2["C"] = {40, 50, 60};
6774                pandas::DataFrame df2(data2);
6775                df2 = df2.set_axis({"x", "y", "z"}, 0);
6776
6777                auto reindexed = df1.reindex_like(df2);
6778                if (reindexed.nrows() != 3 || reindexed.ncols() != 2) {
6779                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : reindex_like wrong shape" << std::endl;
6780                    throw std::runtime_error("pd_test_dataframe_index_ops failed: reindex_like");
6781                }
6782            }
6783
6784            std::cout << " -> tests passed" << std::endl;
6785        }
6786
6787        // =====================================================================
reindex_promote_object_bool_fill (pd_test_5_all.cpp:43334)
43324               df, actual, expected, "object", local_fail);
43325}
43326
43327void f_dtype_object_collapse_groupby_314827_case_13_series_float64_reindex_float_fill_one(int& local_fail) {
43328    std::cout << "-- case_13_series_float64_reindex_float_fill_one\n";
43329    pandas::Series<numpy::float64> s({1.0, 2.0, 3.0});
43330    s.set_index(std::make_unique<pandas::Index<std::string>>(
43331        std::vector<std::string>{"a", "b", "c"}));
43332    // c2: invoke the new bool-promote overload directly (mirrors what the
43333    // PandasPython binding now does for a Python bool fill_value).
43334    auto r = s.reindex_promote_object_bool_fill({"a", "b", "z"}, /*method=*/"",
43335                                                /*bool_fill_value=*/true);
43336    pandas::DataFrame df = r.to_frame(std::optional<std::string>("v"));
43337    std::string actual = df.to_string();
43338
43339    std::cout << "    nrows=" << df.nrows() << " ncols=" << df.ncols() << "\n";
43340
43341    std::string expected =
43342        "      v\n"
43343        "a   1.0\n"
43344        "b   2.0\n"
reindex_with_fill (pd_test_5_all.cpp:97806)
97796            // by recording a pass: we don't fail the suite for a cell that
97797            // can't even be set up.
97798            pandas_tests::check(true, tag + " [override-rejected-by-storage]", local_fail);
97799            return;
97800        }
97801    }
97802    std::vector<std::string> target_keys = has_missing
97803        ? std::vector<std::string>{"a", "b", "z"}
97804        : std::vector<std::string>{"a", "b", "c"};
97805    try {
97806        pandas::Result r = s.reindex_with_fill(
97807            target_keys, std::string(""), fv,
97808            std::nullopt, std::nullopt, {}, {});
97809        Probe p = probe_result(r);
97810        pandas_tests::check(p.in_whitelist,
97811            tag + " [variant in apply_override_to_result whitelist]",
97812            local_fail);
97813        pandas_tests::check_str(tag + " [result_override]",
97814            expected_override, p.result_override, local_fail);
97815    } catch (const std::exception& e) {
97816        pandas_tests::check(false,
reindex_with_indexer (pd_test_5_all.cpp:40388)
40378    s.set_dtype_override("boolean");
40379    s.set_freq(std::optional<std::string>("D"));
40380    s.set_string_na_sentinel_disabled(true);
40381
40382    // Indexer: identity over the 3 source positions.
40383    numpy::NDArray<numpy::int64> indexer(std::vector<size_t>{3});
40384    indexer.setElementAt({0}, 0);
40385    indexer.setElementAt({1}, 1);
40386    indexer.setElementAt({2}, 2);
40387
40388    auto base = s.reindex_with_indexer(indexer);
40389    pandas_tests::check(base != nullptr, "case7.reindex_with_indexer_nonnull", local_fail);
40390    if (!base) return;
40391
40392    auto* r = dynamic_cast<pandas::Series<std::int64_t>*>(base.get());
40393    pandas_tests::check(r != nullptr, "case7.reindex_with_indexer_is_Series_int64",
40394                        local_fail);
40395    if (!r) return;
40396
40397    // dtype_override propagates (oracle says yes).
40398    pandas_tests::check(r->dtype_override().has_value() &&
rename (pd_test_1_all.cpp:5816)
5806    std::cout << " -> tests passed" << std::endl;
5807}
5808
5809void pd_test_categorical_index_rename() {
5810    std::cout << "========= rename ======================================";
5811
5812    pandas::CategoricalArray arr({"x", "y"});
5813    pandas::CategoricalIndex idx(arr, "old_name");
5814
5815    pandas::CategoricalIndex renamed = idx.rename("new_name");
5816
5817    bool passed = (renamed.name().has_value() && *renamed.name() == "new_name" &&
5818                   renamed.size() == idx.size() && renamed.categories() == idx.categories());
5819    if (!passed) {
5820        std::cout << "  [FAIL] : in pd_test_categorical_index_rename()" << std::endl;
5821        throw std::runtime_error("pd_test_categorical_index_rename failed");
5822    }
5823
5824    std::cout << " -> tests passed" << std::endl;
5825}
rename_axis (pd_test_1_all.cpp:6760)
6750                    throw std::runtime_error("pd_test_dataframe_index_ops failed: get_optional Z");
6751                }
6752            }
6753
6754            // Test rename_axis
6755            {
6756                std::map<std::string, std::vector<int>> data;
6757                data["A"] = {1, 2, 3};
6758                pandas::DataFrame df(data);
6759
6760                auto renamed = df.rename_axis("my_index", 0);
6761                // Should not throw
6762            }
6763
6764            // Test reindex_like
6765            {
6766                std::map<std::string, std::vector<int>> data1;
6767                data1["A"] = {1, 2};
6768                data1["B"] = {3, 4};
6769                pandas::DataFrame df1(data1);
reorder_levels (pd_test_1_all.cpp:14495)
14485        void pd_test_multiindex_reorder_levels() {
14486            std::cout << "========= reorder_levels ============================== ";
14487
14488            std::vector<std::vector<std::string>> arrays = {
14489                {"a", "b"},
14490                {"x", "y"},
14491                {"1", "2"}
14492            };
14493
14494            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays);
14495            pandas::MultiIndex reordered = mi.reorder_levels({2, 0, 1});
14496
14497            bool passed = true;
14498
14499            auto tup = reordered[0];
14500            if (tup[0] != "1" || tup[1] != "a" || tup[2] != "x") {
14501                std::cout << "  [FAIL] : reordered tuple should be ('1', 'a', 'x')" << std::endl;
14502                passed = false;
14503            }
14504
14505            if (!passed) {
replace (pd_test_1_all.cpp:6623)
6613                }
6614            }
6615
6616            // Test replace
6617            {
6618                std::map<std::string, std::vector<numpy::float64>> float_data;
6619                float_data["X"] = {1.0, 2.0, 3.0};
6620                float_data["Y"] = {2.0, 2.0, 4.0};
6621                pandas::DataFrame df_repl(float_data);
6622
6623                auto replaced = df_repl.replace(2.0, 99.0);
6624                // Check some value was replaced (crude check via string)
6625                std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626                if (val_str.find("99") == std::string::npos) {
6627                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628                    throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
replace (pd_test_1_all.cpp:6623)
6613                }
6614            }
6615
6616            // Test replace
6617            {
6618                std::map<std::string, std::vector<numpy::float64>> float_data;
6619                float_data["X"] = {1.0, 2.0, 3.0};
6620                float_data["Y"] = {2.0, 2.0, 4.0};
6621                pandas::DataFrame df_repl(float_data);
6622
6623                auto replaced = df_repl.replace(2.0, 99.0);
6624                // Check some value was replaced (crude check via string)
6625                std::string val_str = replaced.col<numpy::float64>("X").get_value_str(1);
6626                if (val_str.find("99") == std::string::npos) {
6627                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : replace didn't work" << std::endl;
6628                    throw std::runtime_error("pd_test_dataframe_manipulation failed: replace");
6629                }
6630            }
6631
6632            // Test drop_duplicates
6633            {
reset_index (pd_test_3_all.cpp:1618)
1608    }
1609
1610    std::cout << " -> tests passed" << std::endl;
1611}
1612
1613// ============================================================================
1614// Category 10: Remaining Untested Functions
1615// ============================================================================
1616
1617void pd_test_3_all_series_reset_index() {
1618    std::cout << "========= Series.reset_index() =======================";
1619
1620    std::vector<double> vals = {10.0, 20.0, 30.0};
1621    pandas::Series<double> s(vals, "test");
1622
1623    // Set a custom index
1624    pandas::Index<std::string> custom_idx({"a", "b", "c"});
1625    s.set_index(custom_idx);
1626
1627    // Reset the index
1628    s.reset_index(true);  // drop=true
set_axis (pd_test_1_all.cpp:6673)
6663            std::cout << " -> tests passed" << std::endl;
6664        }
6665
6666        // =====================================================================
6667        // Test: Index Operations
6668        // =====================================================================
6669        void pd_test_dataframe_index_ops() {
6670            std::cout << "========= index operations =================";
6671
6672            // Test set_axis (rows)
6673            {
6674                std::map<std::string, std::vector<int>> data;
6675                data["A"] = {1, 2, 3};
6676                pandas::DataFrame df(data);
6677
6678                auto renamed = df.set_axis({"x", "y", "z"}, 0);
6679                std::string idx0 = renamed.index().get_value_str(0);
6680                if (idx0 != "x") {
6681                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6682                    throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
set_index (pd_test_1_all.cpp:20318)
20308            // Set datetime index
20309            std::vector<std::string> dates = {
20310                "2020-01-01 00:00:00",
20311                "2020-01-01 12:00:00",
20312                "2020-01-02 00:00:00",
20313                "2020-01-02 12:00:00",
20314                "2020-01-03 00:00:00",
20315                "2020-01-03 12:00:00"
20316            };
20317            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20318
20319            // Resample to daily
20320            auto resampler = df.resample("D");
20321            pandas::DataFrame result = resampler.sum();
20322
20323            // Check that we got aggregated results
20324            bool passed = (result.nrows() <= df.nrows());
20325
20326            if (!passed) {
20327                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
swaplevel (pd_test_1_all.cpp:14461)
14451        void pd_test_multiindex_swaplevel() {
14452            std::cout << "========= swaplevel =================================== ";
14453
14454            std::vector<std::vector<std::string>> arrays = {
14455                {"a", "b"},
14456                {"x", "y"}
14457            };
14458            std::vector<std::optional<std::string>> names = {"first", "second"};
14459
14460            pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
14461            pandas::MultiIndex swapped = mi.swaplevel(0, 1);
14462
14463            bool passed = true;
14464
14465            // Tuple should be reversed
14466            auto tup = swapped[0];
14467            if (tup[0] != "x" || tup[1] != "a") {
14468                std::cout << "  [FAIL] : swapped tuple should be ('x', 'a')" << std::endl;
14469                passed = false;
14470            }
update (pd_test_1_all.cpp:13945)
13935            if (!result.has_column("C")) {
13936                passed = false;
13937                std::cout << "  [FAIL] : in pd_test_joining_combine_first() : missing column C" << std::endl;
13938                throw std::runtime_error("pd_test_joining_combine_first failed: col C missing");
13939            }
13940
13941            std::cout << " -> tests passed" << std::endl;
13942        }
13943
13944        // =====================================================================
13945        // update() Tests
13946        // =====================================================================
13947
13948        void pd_test_joining_update() {
13949            std::cout << "========= update ======================================";
13950
13951            std::map<std::string, std::vector<double>> left_data = {
13952                {"A", {1.0, 2.0, 3.0}},
13953                {"B", {10.0, 20.0, 30.0}}
13954            };
13955            std::vector<std::string> left_idx = {"x", "y", "z"};
backfill (pd_test_3_all.cpp:2645)
2635void pd_test_3_all_df_backfill_pad() {
2636    std::cout << "========= DataFrame.backfill/pad() =======================";
2637
2638    std::map<std::string, std::vector<double>> data = {
2639        {"A", {1.0, std::nan(""), std::nan(""), 4.0}},
2640        {"B", {std::nan(""), 2.0, std::nan(""), 4.0}}
2641    };
2642    pandas::DataFrame df(data);
2643
2644    // Test backfill (should fill backward)
2645    pandas::DataFrame bfill_result = df.backfill(0);
2646    if (bfill_result.nrows() != 4 || bfill_result.ncols() != 2) {
2647        throw std::runtime_error("backfill shape failed");
2648    }
2649
2650    // Test pad (should fill forward)
2651    pandas::DataFrame pad_result = df.pad(0);
2652    if (pad_result.nrows() != 4 || pad_result.ncols() != 2) {
2653        throw std::runtime_error("pad shape failed");
2654    }
bfill (pd_test_1_all.cpp:23603)
23593        std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594        return 0;
23595    }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
ffill (pd_test_1_all.cpp:23603)
23593        std::cout << "====================================== [OK] pd_test_equals test suite ========================== " << std::endl;
23594        return 0;
23595    }
23596
23597} // namespace dataframe_tests
23598// ------------------- pd_test_equals.cpp (end) -----------------------------
23599
23600// ------------------- pd_test_ffill_bfill.cpp (start) -----------------------------
23601// dataframe_tests/pd_test_ffill_bfill.cpp
23602// Test file for DataFrame.ffill() and DataFrame.bfill() methods
23603
23604#include <iostream>
23605#include <stdexcept>
23606#include <cmath>
23607#include <limits>
23608#include <map>
23609#include "../pandas/pd_dataframe.h"
23610
23611// CRITICAL: No using namespace directives
fillna (pd_test_1_all.cpp:537)
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
535        }
536
537        // Test fillna (fill with existing category)
538        pandas::CategoricalArray filled = arr.fillna("a");  // 'a' is in categories
539        if (filled.has_na()) {
540            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : fillna should have no NA" << std::endl;
541            throw std::runtime_error("pd_test_categorical_array_na_handling failed: fillna should have no NA");
542        }
543
544        std::cout << " -> tests passed" << std::endl;
545    }
546
547    void pd_test_categorical_array_add_categories() {
fillna_resolved (pd_test_5_all.cpp:100541)
100531void case_3_object_fillna_nan_no_missing(int& local_fail) {
100532    std::cout << "-- case_3_object_fillna_nan_no_missing\n";
100533    pandas::Series<numpy::object_> s(
100534        std::vector<numpy::object_>{
100535            numpy::object_(std::string("a")),
100536            numpy::object_(std::string("b")),
100537            numpy::object_(std::string("c"))},
100538        std::optional<std::string>{});
100539    pandas::Result r;
100540    try {
100541        r = s.fillna_resolved(pandas::FillValue::nan());
100542    } catch (const std::exception& e) {
100543        std::cout << "  exception: " << e.what() << "\n";
100544    }
100545    pandas_tests::check(r.is_series(),
100546        "C_26m_case_3_object_fillna_nan_no_missing()_is_series", local_fail);
100547    pandas_tests::check(result_object_series_dtype_full(r) == "object",
100548        "C_26m_case_3_object_fillna_nan_no_missing()_dtype_object", local_fail);
100549}
100550
100551void case_4_object_fillna_na_with_sentinel(int& local_fail) {
fillna_string (pd_test_5_all.cpp:47965)
47955                                            "NaT", "null", "<NA>", "x", ""});
47956        auto& col = df["col"];
47957        for (size_t r = 0; r < df.nrows(); ++r) {
47958            std::cout << tag << "  [" << r << "] val=\""
47959                      << col.get_value_str(r) << "\" is_na_at="
47960                      << col.is_na_at(r) << "\n";
47961        }
47962        // CROSS-REFERENCE: pd_series.h:1938 lists only ""/None/nan/NaN as NA
47963        // for Series<std::string>; "NA"/"NaT"/"null"/"<NA>" are NOT treated
47964        // as NA by is_na_at.  This interacts with the fillna bug (item #1):
47965        // fillna_string (pd_series.h:1995) shares the SAME list.
47966    } catch (const std::exception& e) {
47967        std::cout << tag << " exception: " << e.what() << "\n";
47968    }
47969    std::cout << tag << " === end ===\n";
47970}
47971
47972static void P33_forced_object_sentinels() {
47973    const std::string tag = "[P33]";
47974    std::cout << "\n" << tag
47975              << " === dtype='object' with 'NaT'/'null' literals (residual bug?) ===\n";
interpolate (pd_test_1_all.cpp:24365)
24355        std::cout << "====================================== [OK] pd_test_idxmax_idxmin test suite ========================== " << std::endl;
24356        return 0;
24357    }
24358
24359} // namespace dataframe_tests
24360// ------------------- pd_test_idxmax_idxmin.cpp (end) -----------------------------
24361
24362// ------------------- pd_test_interpolate.cpp (start) -----------------------------
24363// dataframe_tests/pd_test_interpolate.cpp
24364// Test file for DataFrame.interpolate() method
24365
24366#include <iostream>
24367#include <stdexcept>
24368#include <cmath>
24369#include <limits>
24370#include <map>
24371#include "../pandas/pd_dataframe.h"
24372
24373// CRITICAL: No using namespace directives
isna (pd_test_1_all.cpp:524)
514            throw std::runtime_error("pd_test_categorical_array_na_handling failed: has_na() should be true");
515        }
516
517        // Test count (non-NA)
518        if (arr.count() != 2) {
519            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : count() != 2" << std::endl;
520            throw std::runtime_error("pd_test_categorical_array_na_handling failed: count() != 2");
521        }
522
523        // Test isna array
524        numpy::NDArray<numpy::bool_> na_mask = arr.isna();
525        if (na_mask.getSize() != 4) {
526            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : isna size != 4" << std::endl;
527            throw std::runtime_error("pd_test_categorical_array_na_handling failed: isna size != 4");
528        }
529
530        // Test dropna
531        pandas::CategoricalArray dropped = arr.dropna();
532        if (dropped.size() != 2) {
533            std::cout << "  [FAIL] : in pd_test_categorical_array_na_handling() : dropna size != 2" << std::endl;
534            throw std::runtime_error("pd_test_categorical_array_na_handling failed: dropna size != 2");
isnull (pd_test_3_all.cpp:671)
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665    std::cout << "========= Index.isnull/notnull() =====================";
666
667    // Test with float index (can have NaN)
668    std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669    pandas::Index<double> idx(vals);
670
671    numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672    if (isnull_result.getSize() != 4) {
673        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674        throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675    }
676    // Index 0: 1.0 -> not null
677    if (isnull_result.getElementAt({0})) {
678        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : index 0 should not be null" << std::endl;
679        throw std::runtime_error("pd_test_3_all_index_null_detection failed: index 0");
680    }
681    // Index 1: NaN -> null
notna (pd_test_1_all.cpp:6595)
6585                if (!na_mask.getElementAt({2, 1})) {
6586                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (2,1) should be true" << std::endl;
6587                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (2,1)");
6588                }
6589                // Row 0, col 0 should NOT be NA
6590                if (na_mask.getElementAt({0, 0})) {
6591                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : isna at (0,0) should be false" << std::endl;
6592                    throw std::runtime_error("pd_test_dataframe_manipulation failed: isna at (0,0)");
6593                }
6594
6595                auto notna_mask = df_na.notna();
6596                if (notna_mask.getElementAt({1, 0})) {
6597                    std::cout << "  [FAIL] : in pd_test_dataframe_manipulation() : notna at (1,0) should be false" << std::endl;
6598                    throw std::runtime_error("pd_test_dataframe_manipulation failed: notna at (1,0)");
6599                }
6600            }
6601
6602            // Test fillna
6603            {
6604                std::map<std::string, std::vector<numpy::float64>> float_data;
6605                float_data["X"] = {1.0, std::nan(""), 3.0};
notnull (pd_test_3_all.cpp:665)
655    }
656
657    std::cout << " -> tests passed" << std::endl;
658}
659
660// ============================================================================
661// Category 5: Index Null Detection
662// ============================================================================
663
664void pd_test_3_all_index_null_detection() {
665    std::cout << "========= Index.isnull/notnull() =====================";
666
667    // Test with float index (can have NaN)
668    std::vector<double> vals = {1.0, std::nan(""), 3.0, std::nan("")};
669    pandas::Index<double> idx(vals);
670
671    numpy::NDArray<numpy::bool_> isnull_result = idx.isnull();
672    if (isnull_result.getSize() != 4) {
673        std::cout << "  [FAIL] : in pd_test_3_all_index_null_detection() : isnull() size mismatch" << std::endl;
674        throw std::runtime_error("pd_test_3_all_index_null_detection failed: isnull() size");
675    }
pad (pd_test_3_all.cpp:1771)
1761    if (result_single.nrows() != 3 || result_single.ncols() != 1) {
1762        std::cout << "  [FAIL] : in pd_test_3_all_dataframe_unstack() : single col shape mismatch" << std::endl;
1763        throw std::runtime_error("pd_test_3_all_dataframe_unstack failed: single col shape");
1764    }
1765
1766    std::cout << " -> tests passed" << std::endl;
1767}
1768
1769void pd_test_3_all_fbbuilder_pad() {
1770    std::cout << "========= FBBuilder.pad() (internal) =================";
1771
1772    // Note: FBBuilder.pad() is an internal method for FlatBuffer serialization
1773    // It's not the pandas DataFrame.pad() method (which is ffill alias)
1774    // This test verifies the to_feather() serialization works, which uses FBBuilder.pad()
1775
1776    std::map<std::string, std::vector<double>> data = {
1777        {"A", {1.0, 2.0, 3.0}},
1778        {"B", {4.0, 5.0, 6.0}}
1779    };
1780    pandas::DataFrame df(data);
count (pd_test_1_all.cpp:66)
56        if (arr.is_na(0)) {
57            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : is_na(0) should be false" << std::endl;
58            throw std::runtime_error("pd_test_boolean_array_na_handling failed: is_na(0) should be false");
59        }
60
61        if (!arr.has_na()) {
62            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : has_na() should be true" << std::endl;
63            throw std::runtime_error("pd_test_boolean_array_na_handling failed: has_na() should be true");
64        }
65
66        if (arr.count() != 2) {
67            std::cout << "  [FAIL] : in pd_test_boolean_array_na_handling() : count() should be 2" << std::endl;
68            throw std::runtime_error("pd_test_boolean_array_na_handling failed: count() should be 2");
69        }
70
71        std::cout << " -> tests passed" << std::endl;
72    }
73
74    void pd_test_boolean_array_kleene_and() {
75        std::cout << "========= BooleanArray: Kleene AND ======================= ";
cummax (pd_test_1_all.cpp:5152)
5142            // cummin: [1, 1, 1, 1]
5143            auto cmin = df.cummin();
5144            val = cmin["A"].get_value_str(3);
5145            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5146            if (!passed) {
5147                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummin failed" << std::endl;
5148                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummin failed");
5149            }
5150
5151            // cummax: [1, 2, 3, 4]
5152            auto cmax = df.cummax();
5153            val = cmax["A"].get_value_str(2);
5154            passed = std::abs(std::stod(val) - 3.0) < 0.001;
5155            if (!passed) {
5156                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummax failed" << std::endl;
5157                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummax failed");
5158            }
5159
5160            std::cout << " -> tests passed" << std::endl;
5161        }
cummin (pd_test_1_all.cpp:5143)
5133            // cumprod: [1, 2, 6, 24]
5134            auto cp = df.cumprod();
5135            val = cp["A"].get_value_str(3);
5136            passed = std::abs(std::stod(val) - 24.0) < 0.001;
5137            if (!passed) {
5138                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumprod failed" << std::endl;
5139                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumprod failed");
5140            }
5141
5142            // cummin: [1, 1, 1, 1]
5143            auto cmin = df.cummin();
5144            val = cmin["A"].get_value_str(3);
5145            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5146            if (!passed) {
5147                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cummin failed" << std::endl;
5148                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cummin failed");
5149            }
5150
5151            // cummax: [1, 2, 3, 4]
5152            auto cmax = df.cummax();
5153            val = cmax["A"].get_value_str(2);
cumprod (pd_test_1_all.cpp:5134)
5124            // cumsum: [1, 3, 6, 10]
5125            auto cs = df.cumsum();
5126            std::string val = cs["A"].get_value_str(2);
5127            bool passed = std::abs(std::stod(val) - 6.0) < 0.001;
5128            if (!passed) {
5129                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumsum failed" << std::endl;
5130                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumsum failed");
5131            }
5132
5133            // cumprod: [1, 2, 6, 24]
5134            auto cp = df.cumprod();
5135            val = cp["A"].get_value_str(3);
5136            passed = std::abs(std::stod(val) - 24.0) < 0.001;
5137            if (!passed) {
5138                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumprod failed" << std::endl;
5139                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumprod failed");
5140            }
5141
5142            // cummin: [1, 1, 1, 1]
5143            auto cmin = df.cummin();
5144            val = cmin["A"].get_value_str(3);
cumsum (pd_test_1_all.cpp:5125)
5115        }
5116
5117        void pd_test_arithmetic_dataframe_cumulative() {
5118            std::cout << "========= DataFrame cumulative ==================";
5119
5120            std::map<std::string, std::vector<double>> data;
5121            data["A"] = {1.0, 2.0, 3.0, 4.0};
5122            pandas::DataFrame df(data);
5123
5124            // cumsum: [1, 3, 6, 10]
5125            auto cs = df.cumsum();
5126            std::string val = cs["A"].get_value_str(2);
5127            bool passed = std::abs(std::stod(val) - 6.0) < 0.001;
5128            if (!passed) {
5129                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_cumulative() : cumsum failed" << std::endl;
5130                throw std::runtime_error("pd_test_arithmetic_dataframe_cumulative failed: cumsum failed");
5131            }
5132
5133            // cumprod: [1, 2, 6, 24]
5134            auto cp = df.cumprod();
5135            val = cp["A"].get_value_str(3);
describe (pd_test_2_all.cpp:19793)
19783        ++g_fail;
19784    }
19785}
19786
19787static bool approx_eq(double a, double b, double tol = 1e-9) {
19788    if (std::isnan(a) && std::isnan(b)) return true;
19789    return std::abs(a - b) < tol;
19790}
19791
19792// =====================================================================
19793// Test: describe() default mode — numeric columns only
19794// =====================================================================
19795
19796void pd_test_describe_numeric_only() {
19797    std::cout << "  -- pd_test_describe_numeric_only --" << std::endl;
19798
19799    pandas::DataFrame df;
19800    df.add_column("A", std::vector<double>{1.0, 2.0, 3.0, 4.0, 5.0});
19801    df.add_column("B", std::vector<double>{10.0, 20.0, 30.0, 40.0, 50.0});
19802    df.add_column("Name", std::vector<std::string>{"a", "b", "c", "d", "e"});
describe_as_series (pd_test_3_all.cpp:25806)
25796    if (idx2 != "0.75") throw std::runtime_error("Expected index '0.75', got '" + idx2 + "'");
25797    // Check values (quantile of 1,2,3,4,5: q=0.5 should be 3.0)
25798    double v1 = result[1];
25799    if (std::abs(v1 - 3.0) > 1e-10) throw std::runtime_error("Expected median 3.0, got " + std::to_string(v1));
25800    std::cout << "PASSED" << std::endl;
25801}
25802
25803void pd_test_series_format_helpers_describe() {
25804    std::cout << "  describe_as_series... ";
25805    ::pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0, 5.0}, "test");
25806    auto result = s.describe_as_series();
25807    // Should have 8 rows: count, mean, std, min, 25%, 50%, 75%, max
25808    if (result.size() != 8) throw std::runtime_error("Expected 8 rows, got " + std::to_string(result.size()));
25809    // Check count = 5.0
25810    double count_val = result[0];
25811    if (std::abs(count_val - 5.0) > 1e-10) throw std::runtime_error("Expected count=5.0, got " + std::to_string(count_val));
25812    // Check index[0] = "count"
25813    std::string idx0 = result.index().get_value_str(0);
25814    if (idx0 != "count") throw std::runtime_error("Expected index[0]='count', got '" + idx0 + "'");
25815    std::cout << "PASSED" << std::endl;
25816}
kurt (pd_test_1_all.cpp:4599)
4589            std::cout << "========= Series skew/kurt ======================";
4590
4591            pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592            auto skew_val = s.skew();
4593            bool passed = skew_val.has_value() && *skew_val > 0;  // Should be right-skewed
4594            if (!passed) {
4595                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597            }
4598
4599            auto kurt_val = s.kurt();
4600            passed = kurt_val.has_value();
4601            if (!passed) {
4602                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
4603                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurt should have value");
4604            }
4605
4606            // Test kurtosis alias
4607            auto kurt_alias = s.kurtosis();
4608            passed = kurt_alias.has_value() && std::abs(*kurt_alias - *kurt_val) < 0.0001;
4609            if (!passed) {
kurtosis (pd_test_1_all.cpp:4607)
4597            }
4598
4599            auto kurt_val = s.kurt();
4600            passed = kurt_val.has_value();
4601            if (!passed) {
4602                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
4603                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurt should have value");
4604            }
4605
4606            // Test kurtosis alias
4607            auto kurt_alias = s.kurtosis();
4608            passed = kurt_alias.has_value() && std::abs(*kurt_alias - *kurt_val) < 0.0001;
4609            if (!passed) {
4610                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurtosis alias failed" << std::endl;
4611                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurtosis alias failed");
4612            }
4613
4614            std::cout << " -> tests passed" << std::endl;
4615        }
4616
4617        void pd_test_aggregation_series_pct_change() {
max (pd_test_1_all.cpp:771)
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
mean (pd_test_1_all.cpp:282)
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
median (pd_test_1_all.cpp:20910)
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
20901            }
20902
20903            std::cout << " -> tests passed" << std::endl;
20904        }
20905
20906        void pd_test_expanding_median() {
20907            std::cout << "========= Expanding median ======================";
20908
20909            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20910            auto result = s.expanding().median();
20911
20912            // Expanding median: 1, 1.5, 2, 2.5, 3
20913            bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20914                          std::abs(result[1] - 1.5) < 0.001 &&
20915                          std::abs(result[2] - 2.0) < 0.001 &&
20916                          std::abs(result[3] - 2.5) < 0.001 &&
20917                          std::abs(result[4] - 3.0) < 0.001;
20918            if (!passed) {
20919                std::cout << "  [FAIL] : in pd_test_expanding_median() : expanding median values incorrect" << std::endl;
20920                throw std::runtime_error("pd_test_expanding_median failed: expanding median values incorrect");
min (pd_test_1_all.cpp:764)
754    }
755
756    void pd_test_categorical_array_ordered_operations() {
757        std::cout << "========= CategoricalArray: ordered operations (min/max) ======================= ";
758
759        std::vector<std::string> cats = {"low", "medium", "high"};
760        std::vector<numpy::int32> codes = {0, 2, 1, 0, -1};  // low, high, medium, low, NA
761        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats, true);  // ordered
762
763        // Test min
764        std::optional<std::string> min_val = arr.min();
765        if (!min_val.has_value() || *min_val != "low") {
766            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : min != 'low'" << std::endl;
767            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: min != 'low'");
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
mode (pd_test_1_all.cpp:4569)
4559                throw std::runtime_error("pd_test_aggregation_series_quantile failed: quantile(1) should be 5.0");
4560            }
4561
4562            std::cout << " -> tests passed" << std::endl;
4563        }
4564
4565        void pd_test_aggregation_series_mode() {
4566            std::cout << "========= Series mode ===========================";
4567
4568            pandas::Series<int> s({1, 2, 2, 3, 3, 3});
4569            auto m = s.mode();
4570            bool passed = m.size() == 1 && m[0] == 3;
4571            if (!passed) {
4572                std::cout << "  [FAIL] : in pd_test_aggregation_series_mode() : mode should be 3" << std::endl;
4573                throw std::runtime_error("pd_test_aggregation_series_mode failed: mode should be 3");
4574            }
4575
4576            // Test multi-mode
4577            pandas::Series<int> s2({1, 1, 2, 2});
4578            auto m2 = s2.mode();
4579            passed = m2.size() == 2;  // Both 1 and 2 are modes
nunique (pd_test_1_all.cpp:10604)
10594    std::cout << " -> tests passed" << std::endl;
10595}
10596
10597void pd_test_extension_index_nunique() {
10598    std::cout << "========= nunique =========================";
10599
10600    pandas::CategoricalArray arr({"a", "b", "a", "c", "b", std::nullopt});
10601    pandas::CategoricalIndex idx(arr);
10602
10603    bool passed = (idx.nunique(true) == 3 && idx.nunique(false) == 4);
10604    if (!passed) {
10605        std::cout << "  [FAIL] : in pd_test_extension_index_nunique() : nunique check failed" << std::endl;
10606        throw std::runtime_error("pd_test_extension_index_nunique failed");
10607    }
10608
10609    std::cout << " -> tests passed" << std::endl;
10610}
10611
10612void pd_test_extension_index_factorize() {
10613    std::cout << "========= factorize =========================";
prod (pd_test_1_all.cpp:26082)
26072        std::cout << "====================================== [OK] pd_test_pivot_table test suite ========================== " << std::endl;
26073        return 0;
26074    }
26075
26076} // namespace dataframe_tests
26077// ------------------- pd_test_pivot_table.cpp (end) -----------------------------
26078
26079// ------------------- pd_test_prod.cpp (start) -----------------------------
26080// dataframe_tests/pd_test_prod.cpp
26081// Tests for DataFrame.prod() and DataFrame.prod_cols() methods
26082
26083#include <iostream>
26084#include <stdexcept>
26085#include <cmath>
26086#include <limits>
26087#include "../pandas/pd_dataframe.h"
26088
26089// CRITICAL: No using namespace directives
26090
26091namespace dataframe_tests {
product (pd_test_3_all.cpp:2584)
2574    // Test quantile along rows
2575    pandas::Series<numpy::float64> q50_rows = df.quantile(0.5, 1);
2576    if (q50_rows.size() != 5) {
2577        throw std::runtime_error("quantile(0.5, axis=1) failed");
2578    }
2579
2580    std::cout << " -> tests passed" << std::endl;
2581}
2582
2583void pd_test_3_all_df_product() {
2584    std::cout << "========= DataFrame.product(axis) ========================";
2585
2586    std::map<std::string, std::vector<double>> data = {
2587        {"A", {1.0, 2.0, 3.0}},
2588        {"B", {4.0, 5.0, 6.0}}
2589    };
2590    pandas::DataFrame df(data);
2591
2592    // Test product along columns
2593    pandas::Series<numpy::float64> prod_cols = df.product(0);
2594    if (prod_cols.size() != 2 || std::abs(prod_cols[static_cast<size_t>(0)] - 6.0) > 0.001 ||
quantile (pd_test_1_all.cpp:4540)
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
4535
4536        void pd_test_aggregation_series_quantile() {
4537            std::cout << "========= Series quantile =======================";
4538
4539            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4540            auto q50 = s.quantile(0.5);
4541            bool passed = q50.has_value() && std::abs(*q50 - 3.0) < 0.001;
4542            if (!passed) {
4543                std::cout << "  [FAIL] : in pd_test_aggregation_series_quantile() : quantile(0.5) should be 3.0" << std::endl;
4544                throw std::runtime_error("pd_test_aggregation_series_quantile failed: quantile(0.5) should be 3.0");
4545            }
4546
4547            // Test q=0 and q=1
4548            auto q0 = s.quantile(0.0);
4549            passed = q0.has_value() && std::abs(*q0 - 1.0) < 0.001;
4550            if (!passed) {
quantile_list (pd_test_3_all.cpp:25788)
25778    std::string r2 = ::pandas::display::format_quantile_label(0.5);
25779    if (r2 != "0.5") throw std::runtime_error("Expected '0.5', got '" + r2 + "'");
25780    std::string r3 = ::pandas::display::format_quantile_label(0.75);
25781    if (r3 != "0.75") throw std::runtime_error("Expected '0.75', got '" + r3 + "'");
25782    std::cout << "PASSED" << std::endl;
25783}
25784
25785void pd_test_series_format_helpers_quantile_list() {
25786    std::cout << "  quantile_list... ";
25787    ::pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0, 5.0}, "test");
25788    auto result = s.quantile_list({0.25, 0.5, 0.75});
25789    if (result.size() != 3) throw std::runtime_error("Expected 3 values, got " + std::to_string(result.size()));
25790    // Check index labels
25791    std::string idx0 = result.index().get_value_str(0);
25792    std::string idx1 = result.index().get_value_str(1);
25793    std::string idx2 = result.index().get_value_str(2);
25794    if (idx0 != "0.25") throw std::runtime_error("Expected index '0.25', got '" + idx0 + "'");
25795    if (idx1 != "0.5") throw std::runtime_error("Expected index '0.5', got '" + idx1 + "'");
25796    if (idx2 != "0.75") throw std::runtime_error("Expected index '0.75', got '" + idx2 + "'");
25797    // Check values (quantile of 1,2,3,4,5: q=0.5 should be 3.0)
25798    double v1 = result[1];
sem (pd_test_1_all.cpp:4525)
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
skew (pd_test_1_all.cpp:4592)
4582                throw std::runtime_error("pd_test_aggregation_series_mode failed: multi-mode should return 2 values");
4583            }
4584
4585            std::cout << " -> tests passed" << std::endl;
4586        }
4587
4588        void pd_test_aggregation_series_skew_kurt() {
4589            std::cout << "========= Series skew/kurt ======================";
4590
4591            pandas::Series<double> s({1.0, 2.0, 2.0, 3.0, 9.0});
4592            auto skew_val = s.skew();
4593            bool passed = skew_val.has_value() && *skew_val > 0;  // Should be right-skewed
4594            if (!passed) {
4595                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : skew should be positive" << std::endl;
4596                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: skew should be positive");
4597            }
4598
4599            auto kurt_val = s.kurt();
4600            passed = kurt_val.has_value();
4601            if (!passed) {
4602                std::cout << "  [FAIL] : in pd_test_aggregation_series_skew_kurt() : kurt should have value" << std::endl;
std_ (pd_test_1_all.cpp:20752)
20742                throw std::runtime_error("pd_test_rolling_min_periods failed: with min_periods=1, idx 1 should be 3.0");
20743            }
20744
20745            std::cout << " -> tests passed" << std::endl;
20746        }
20747
20748        void pd_test_rolling_std() {
20749            std::cout << "========= Rolling std ===========================";
20750
20751            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20752            auto result = s.rolling(3).std_();
20753
20754            // std([1,2,3]) = 1.0 (ddof=1)
20755            // std([2,3,4]) = 1.0
20756            // std([3,4,5]) = 1.0
20757            bool passed = std::abs(result[2] - 1.0) < 0.001;
20758            if (!passed) {
20759                std::cout << "  [FAIL] : in pd_test_rolling_std() : rolling std should be 1.0" << std::endl;
20760                throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761            }
sum (pd_test_1_all.cpp:276)
266        }
267
268        // Test sum/mean
269        pandas::BooleanArray arr({
270            std::optional<bool>(true),
271            std::optional<bool>(false),
272            std::optional<bool>(true),
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
value_counts (pd_test_1_all.cpp:865)
855        std::vector<std::optional<std::string>> values = {
856            std::optional<std::string>("a"),
857            std::optional<std::string>("b"),
858            std::optional<std::string>("a"),
859            std::optional<std::string>("a"),
860            std::optional<std::string>("b"),
861            std::nullopt  // NA not counted
862        };
863        pandas::CategoricalArray arr(values);
864
865        auto [cats, counts] = arr.value_counts();
866
867        // Should have 2 categories
868        if (cats.size() != 2 || counts.size() != 2) {
869            std::cout << "  [FAIL] : in pd_test_categorical_array_value_counts() : wrong size" << std::endl;
870            throw std::runtime_error("pd_test_categorical_array_value_counts failed: wrong size");
871        }
872
873        // Find 'a' count
874        int64_t a_count = 0, b_count = 0;
875        for (size_t i = 0; i < cats.size(); ++i) {
var (pd_test_1_all.cpp:20890)
20880                throw std::runtime_error("pd_test_expanding_std failed: expanding std values incorrect");
20881            }
20882
20883            std::cout << " -> tests passed" << std::endl;
20884        }
20885
20886        void pd_test_expanding_var() {
20887            std::cout << "========= Expanding var =========================";
20888
20889            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20890            auto result = s.expanding().var();
20891
20892            // Expanding var (ddof=1): NaN, 0.5, 1.0, 1.6667, 2.5
20893            bool passed = std::isnan(result[0]) &&
20894                          std::abs(result[1] - 0.5) < 0.001 &&
20895                          std::abs(result[2] - 1.0) < 0.001 &&
20896                          std::abs(result[3] - 1.6667) < 0.001 &&
20897                          std::abs(result[4] - 2.5) < 0.001;
20898            if (!passed) {
20899                std::cout << "  [FAIL] : in pd_test_expanding_var() : expanding var values incorrect" << std::endl;
20900                throw std::runtime_error("pd_test_expanding_var failed: expanding var values incorrect");
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg (pd_test_1_all.cpp:11100)
11090        }
11091
11092        void pd_test_func_apply_series_agg() {
11093            std::cout << "========= Series agg ==================================";
11094
11095            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0}, "values");
11096
11097            bool passed = true;
11098
11099            // Test string-based aggregation
11100            auto sum_result = s.agg("sum");
11101            if (!sum_result.has_value() || !approx_equal(sum_result.value(), 15.0)) {
11102                passed = false;
11103                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : sum failed" << std::endl;
11104                throw std::runtime_error("pd_test_func_apply_series_agg failed: sum failed");
11105            }
11106
11107            auto mean_result = s.agg("mean");
11108            if (!mean_result.has_value() || !approx_equal(mean_result.value(), 3.0)) {
11109                passed = false;
11110                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : mean failed" << std::endl;
agg_with_dtype (pd_test_5_all.cpp:94652)
94642static void run_dfgb_case(const std::string& fn,
94643                          const std::string& col,
94644                          const std::string& expected_dtype,
94645                          const std::string& label,
94646                          int& local_fail) {
94647    pandas::DataFrame df = make_mixed_df();
94648    auto gb = df.groupby("key");
94649    pandas::DataFrame out;
94650    std::string err;
94651    try {
94652        out = gb.agg_with_dtype(fn);
94653    } catch (const std::exception& e) {
94654        err = e.what();
94655    } catch (...) {
94656        err = "<unknown>";
94657    }
94658    pandas_tests::check(err.empty(),
94659        label + "_no_throw",
94660        local_fail);
94661    if (!err.empty()) {
94662        std::cout << "  err: " << err << "\n";
aggregate (pd_test_1_all.cpp:11139)
11129            auto custom_agg = s.agg([](const std::vector<double>& v) {
11130                return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131            });
11132            if (!approx_equal(custom_agg, 3.0)) {
11133                passed = false;
11134                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135                throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136            }
11137
11138            // Test aggregate alias
11139            auto alias_result = s.aggregate("sum");
11140            if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141                passed = false;
11142                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143                throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144            }
11145
11146            std::cout << " -> tests passed" << std::endl;
11147        }
11148
11149        void pd_test_func_apply_series_pipe() {
aggregate (pd_test_1_all.cpp:11139)
11129            auto custom_agg = s.agg([](const std::vector<double>& v) {
11130                return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131            });
11132            if (!approx_equal(custom_agg, 3.0)) {
11133                passed = false;
11134                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135                throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136            }
11137
11138            // Test aggregate alias
11139            auto alias_result = s.aggregate("sum");
11140            if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141                passed = false;
11142                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143                throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144            }
11145
11146            std::cout << " -> tests passed" << std::endl;
11147        }
11148
11149        void pd_test_func_apply_series_pipe() {
aggregate (pd_test_1_all.cpp:11139)
11129            auto custom_agg = s.agg([](const std::vector<double>& v) {
11130                return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
11131            });
11132            if (!approx_equal(custom_agg, 3.0)) {
11133                passed = false;
11134                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : custom agg failed" << std::endl;
11135                throw std::runtime_error("pd_test_func_apply_series_agg failed: custom agg failed");
11136            }
11137
11138            // Test aggregate alias
11139            auto alias_result = s.aggregate("sum");
11140            if (!alias_result.has_value() || !approx_equal(alias_result.value(), 15.0)) {
11141                passed = false;
11142                std::cout << "  [FAIL] : in pd_test_func_apply_series_agg() : aggregate alias failed" << std::endl;
11143                throw std::runtime_error("pd_test_func_apply_series_agg failed: aggregate alias failed");
11144            }
11145
11146            std::cout << " -> tests passed" << std::endl;
11147        }
11148
11149        void pd_test_func_apply_series_pipe() {
apply (pd_test_1_all.cpp:11244)
11234        void pd_test_func_apply_dataframe_apply_axis0() {
11235            std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237            std::map<std::string, std::vector<double>> data = {
11238                {"A", {1.0, 2.0, 3.0}},
11239                {"B", {4.0, 5.0, 6.0}}
11240            };
11241            pandas::DataFrame df(data);
11242
11243            // apply axis=0 applies function to each column
11244            auto result = df.apply([](const std::vector<double>& col) {
11245                return std::accumulate(col.begin(), col.end(), 0.0);
11246            }, 0);
11247
11248            bool passed = true;
11249
11250            // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251            // with the original column names ("A", "B") as the row index.
11252            // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253            const auto& result_col = result["result"];
11254            double sum_a = std::stod(result_col.get_value_str(0));
apply (pd_test_1_all.cpp:11244)
11234        void pd_test_func_apply_dataframe_apply_axis0() {
11235            std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237            std::map<std::string, std::vector<double>> data = {
11238                {"A", {1.0, 2.0, 3.0}},
11239                {"B", {4.0, 5.0, 6.0}}
11240            };
11241            pandas::DataFrame df(data);
11242
11243            // apply axis=0 applies function to each column
11244            auto result = df.apply([](const std::vector<double>& col) {
11245                return std::accumulate(col.begin(), col.end(), 0.0);
11246            }, 0);
11247
11248            bool passed = true;
11249
11250            // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251            // with the original column names ("A", "B") as the row index.
11252            // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253            const auto& result_col = result["result"];
11254            double sum_a = std::stod(result_col.get_value_str(0));
apply (pd_test_1_all.cpp:11244)
11234        void pd_test_func_apply_dataframe_apply_axis0() {
11235            std::cout << "========= DataFrame apply axis=0 ======================";
11236
11237            std::map<std::string, std::vector<double>> data = {
11238                {"A", {1.0, 2.0, 3.0}},
11239                {"B", {4.0, 5.0, 6.0}}
11240            };
11241            pandas::DataFrame df(data);
11242
11243            // apply axis=0 applies function to each column
11244            auto result = df.apply([](const std::vector<double>& col) {
11245                return std::accumulate(col.begin(), col.end(), 0.0);
11246            }, 0);
11247
11248            bool passed = true;
11249
11250            // Plan F·dtype: axis=0 reduce now returns a single "result" column
11251            // with the original column names ("A", "B") as the row index.
11252            // Sum of A: 1+2+3=6, Sum of B: 4+5+6=15
11253            const auto& result_col = result["result"];
11254            double sum_a = std::stod(result_col.get_value_str(0));
apply_dispatch (pd_test_5_all.cpp:53781)
53771                  << "  actual dtype:   [" << dts[0] << "]\n";
53772    }
53773}
53774
53775static void f_series_apply_dispatch_502719_case_H1_dispatch_callable_square_int(int& local_fail) {
53776    std::cout << "-- case_H1_dispatch_callable_square_int\n";
53777    pandas::Series<std::int64_t> s({2, 3, 4}, "v");
53778    auto h = dispatch_test_helpers::make_stub_callable(
53779        dispatch_test_helpers::test_cell_int_cb([](double x) { return static_cast<std::int64_t>(x * x); }),
53780        "square");
53781    pandas::Result r = s.apply_dispatch(pandas::FuncArg::from_callable_handle(h));
53782    bool got = std::holds_alternative<std::unique_ptr<pandas::Series<numpy::float64>>>(r.value);
53783    pandas_tests::check(got, "case_H1.is_series_float64", local_fail);
53784    if (!got) return;
53785    auto& sp = std::get<std::unique_ptr<pandas::Series<numpy::float64>>>(r.value);
53786    bool ok = (sp->size() == 3) && (*sp)[0] == 4.0 && (*sp)[1] == 9.0 && (*sp)[2] == 16.0;
53787    pandas_tests::check(ok, "case_H1.values_squared", local_fail);
53788}
53789
53790static void f_series_apply_dispatch_502719_case_H2_dispatch_callable_half_float(int& local_fail) {
53791    std::cout << "-- case_H2_dispatch_callable_half_float\n";
apply_ns_transform (pd_test_4_all.cpp:6365)
6355    auto neg = me.negate();
6356    auto b = s.add_dateoffset(*neg);
6357    EXPECT(a.size() == b.size());
6358    for (size_t i = 0; i < a.size(); ++i) {
6359        EXPECT(static_cast<int64_t>(a[i]) == static_cast<int64_t>(b[i]));
6360    }
6361}
6362
6363void test_apply_ns_transform_identity() {
6364    auto s = make_dt_series({1, 2, 3});
6365    auto out = s.apply_ns_transform([](int64_t x) { return x; }, "datetime64[ns]");
6366    EXPECT(out.size() == 3);
6367    for (size_t i = 0; i < 3; ++i) {
6368        EXPECT(static_cast<int64_t>(out[i]) == static_cast<int64_t>(s[i]));
6369    }
6370    EXPECT(out.dtype_name() == "datetime64[ns]");
6371}
6372
6373void test_tz_aware_calendar_preserves_override() {
6374    auto s = make_dt_series({0});
6375    s.set_dtype_override("datetime64[ns, UTC]");
apply_resolved_typed (pd_test_5_all.cpp:98141)
98131    switch (cid) {
98132        case CbId::Int:   cb = cb_int(hist);    break;
98133        case CbId::Bool:  cb = cb_bool(hist);   break;
98134        case CbId::Float: cb = cb_float(hist);  break;
98135        case CbId::Str:   cb = cb_string(hist); break;
98136        case CbId::Mixed: cb = cb_mixed(hist);  break;
98137    }
98138
98139    pandas::Result r;
98140    try {
98141        r = s.apply_resolved_typed(cb, hist);
98142    } catch (const std::exception& e) {
98143        std::string tag = std::string("apply src=") + src_name(sid) +
98144                          " cb=" + cb_name(cid) + " mode=" + mode_name(mid);
98145        std::cout << "[FAIL] : in f_27a_core_3094022_apply_resolved_typed_post_cb_dtype() "
98146                  << tag << " unexpected exception: " << e.what() << "\n";
98147        ++pandas_tests::g_failed; ++local_fail;
98148        ++pandas_tests::g_failed; ++local_fail;
98149        ++pandas_tests::g_failed; ++local_fail;
98150        return;
98151    }
apply_with_args (pd_test_3_all.cpp:16993)
16983        }
16984    }
16985
16986    if (!passed) {
16987        throw std::runtime_error("pd_test_apply_axis1_broadcast failed");
16988    }
16989    std::cout << " -> tests passed" << std::endl;
16990}
16991
16992void pd_test_apply_with_args() {
16993    std::cout << "========= DataFrame.apply_with_args() =================";
16994
16995    std::map<std::string, std::vector<double>> data = {
16996        {"A", {1.0, 2.0, 3.0}},
16997        {"B", {4.0, 5.0, 6.0}}
16998    };
16999    pandas::DataFrame df(data);
17000
17001    // Apply with additional argument: multiply sum by factor
17002    auto result = df.apply_with_args(
17003        [](const std::vector<double>& col, double factor) {
ewm (pd_test_3_all.cpp:2961)
2951    // Test expanding sum
2952    pandas::DataFrame expanding_sum = df.expanding().sum();
2953    if (expanding_sum.nrows() != 5 || expanding_sum.ncols() != 2) {
2954        throw std::runtime_error("expanding().sum() shape failed");
2955    }
2956
2957    std::cout << " -> tests passed" << std::endl;
2958}
2959
2960void pd_test_3_all_df_ewm() {
2961    std::cout << "========= DataFrame.ewm() ================================";
2962
2963    std::map<std::string, std::vector<double>> data = {
2964        {"A", {1.0, 2.0, 3.0, 4.0, 5.0}},
2965        {"B", {10.0, 20.0, 30.0, 40.0, 50.0}}
2966    };
2967    pandas::DataFrame df(data);
2968
2969    // Test ewm mean with span=3
2970    pandas::DataFrame ewm_mean = df.ewm(std::nullopt, 3.0).mean();
2971    if (ewm_mean.nrows() != 5 || ewm_mean.ncols() != 2) {
ewm_full (pd_test_3_all.cpp:9903)
9893    // Test with span using legacy overload
9894    auto ewm1 = s.ewm(3.0);
9895    auto result1 = ewm1.mean();
9896    if (result1.size() != 5) {
9897        std::cout << "  [FAIL] : ewm with span returned wrong size" << std::endl;
9898        throw std::runtime_error("pd_test_3_all_phase2_ewm_params failed");
9899    }
9900
9901    // Test with alpha using ewm_full
9902    auto ewm2 = s.ewm_full(std::nullopt, std::nullopt, std::nullopt, 0.5);
9903    auto result2 = ewm2.mean();
9904    if (result2.size() != 5) {
9905        std::cout << "  [FAIL] : ewm with alpha returned wrong size" << std::endl;
9906        throw std::runtime_error("pd_test_3_all_phase2_ewm_params failed");
9907    }
9908
9909    std::cout << " -> tests passed" << std::endl;
9910}
9911
9912void pd_test_3_all_phase2_combine_params() {
ewm_span (pd_test_1_all.cpp:21167)
21157            std::cout << " -> tests passed" << std::endl;
21158        }
21159
21160        void pd_test_ewm_span() {
21161            std::cout << "========= EWM span ==============================";
21162
21163            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
21164
21165            // EWM with span=3 => alpha = 2/(3+1) = 0.5
21166            auto result = s.ewm_span(3.0).mean();
21167
21168            // Check that result has correct size
21169            bool passed = result.size() == 5;
21170            if (!passed) {
21171                std::cout << "  [FAIL] : in pd_test_ewm_span() : result size should be 5" << std::endl;
21172                throw std::runtime_error("pd_test_ewm_span failed: result size should be 5");
21173            }
21174
21175            // First value should be equal to original (no weighting yet)
21176            passed = std::abs(result[0] - 1.0) < 0.001;
expanding (pd_test_1_all.cpp:20770)
20760                throw std::runtime_error("pd_test_rolling_std failed: rolling std should be 1.0");
20761            }
20762
20763            std::cout << " -> tests passed" << std::endl;
20764        }
20765
20766        void pd_test_expanding_sum() {
20767            std::cout << "========= Expanding sum =========================";
20768
20769            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20770            auto result = s.expanding().sum();
20771
20772            // Cumulative sum: 1, 3, 6, 10, 15
20773            bool passed = std::abs(result[0] - 1.0) < 0.001 &&
20774                          std::abs(result[1] - 3.0) < 0.001 &&
20775                          std::abs(result[2] - 6.0) < 0.001 &&
20776                          std::abs(result[3] - 10.0) < 0.001 &&
20777                          std::abs(result[4] - 15.0) < 0.001;
20778            if (!passed) {
20779                std::cout << "  [FAIL] : in pd_test_expanding_sum() : expanding sum values incorrect" << std::endl;
20780                throw std::runtime_error("pd_test_expanding_sum failed: expanding sum values incorrect");
groupby (pd_test_1_all.cpp:11495)
11485            std::cout << "========= GroupBy basic =========================";
11486
11487            // Create DataFrame with category column
11488            std::map<std::string, std::vector<double>> data = {
11489                {"category", {1.0, 1.0, 2.0, 2.0, 2.0}},
11490                {"value", {10.0, 20.0, 30.0, 40.0, 50.0}}
11491            };
11492            pandas::DataFrame df(data);
11493
11494            // Test groupby
11495            auto grouped = df.groupby("category");
11496
11497            bool passed = grouped.ngroups() == 2;
11498            if (!passed) {
11499                std::cout << "  [FAIL] : in pd_test_groupby_basic() : ngroups should be 2" << std::endl;
11500                throw std::runtime_error("pd_test_groupby_basic failed: ngroups should be 2");
11501            }
11502
11503            std::cout << " -> tests passed" << std::endl;
11504        }
groupby_by_callable (pd_test_4_all.cpp:6412)
6402    EXPECT(static_cast<int64_t>(out[1]) - (a + add) <= 2);
6403}
6404
6405void test_groupby_by_callable_int_index() {
6406    std::vector<numpy::float64> v = {1.0, 2.0, 3.0, 4.0};
6407    pandas::Series<numpy::float64> s(v);
6408    auto convert = [](size_t /*i*/, const std::string& label, bool /*hint*/) -> std::string {
6409        int64_t k = std::stoll(label);
6410        return std::to_string(k % 2);
6411    };
6412    auto gb = s.groupby_by_callable(convert, true);
6413    EXPECT(gb.ngroups() == 2);
6414}
6415
6416void test_groupby_by_callable_empty() {
6417    pandas::Series<numpy::float64> s(std::vector<numpy::float64>{});
6418    int calls = 0;
6419    auto convert = [&](size_t, const std::string&, bool) -> std::string { ++calls; return ""; };
6420    auto gb = s.groupby_by_callable(convert, true);
6421    EXPECT(calls == 0);
6422    EXPECT(gb.ngroups() == 0);
groupby_by_categorical (pd_test_3_all.cpp:23500)
23490    auto sums = gb.sum();
23491    if (sums[0] != 40.0 || sums[1] != 60.0)
23492        throw std::runtime_error("sum mismatch");
23493    if (gb.grouper_dtype() != "float64")
23494        throw std::runtime_error("grouper_dtype mismatch");
23495
23496    std::cout << " -> tests passed" << std::endl;
23497}
23498
23499void pd_test_groupby_by_categorical() {
23500    std::cout << "========= groupby_by_categorical() ====================";
23501
23502    pandas::Series<numpy::float64> s({10.0, 20.0, 30.0});
23503    pandas::CategoricalArray cat({"a", "b", "a"}, {"a", "b", "c"});
23504
23505    auto gb_obs = s.groupby_by_categorical(cat, true, true);
23506    if (gb_obs.group_keys_order().size() != 2)
23507        throw std::runtime_error("expected 2 observed groups");
23508
23509    auto gb_all = s.groupby_by_categorical(cat, true, false);
23510    if (gb_all.group_keys_order().size() != 3)
groupby_by_index (pd_test_3_all.cpp:23426)
23416    auto gb = s.groupby_by_level(levels, true);
23417    if (gb.group_keys_order().size() != 4)
23418        throw std::runtime_error("expected 4 composite groups");
23419    if (gb.multiindex_names().size() != 2 || gb.multiindex_names()[0] != "L0" || gb.multiindex_names()[1] != "L1")
23420        throw std::runtime_error("multiindex names mismatch");
23421
23422    std::cout << " -> tests passed" << std::endl;
23423}
23424
23425void pd_test_groupby_by_index() {
23426    std::cout << "========= groupby_by_index() ==========================";
23427
23428    pandas::Series<numpy::float64> s({10.0, 20.0, 30.0});
23429    s.set_index(pandas::Index<std::string>({"a", "b", "a"}));
23430    auto gb = s.groupby_by_index(true, true);
23431    if (gb.group_keys_order().size() != 2)
23432        throw std::runtime_error("expected 2 groups");
23433    auto sums = gb.sum();
23434    if (sums[0] != 40.0 || sums[1] != 20.0)
23435        throw std::runtime_error("sum mismatch");
groupby_by_labels (pd_test_3_all.cpp:23520)
23510    if (gb_all.group_keys_order().size() != 3)
23511        throw std::runtime_error("expected 3 groups with observed=false");
23512
23513    if (gb_obs.categorical_categories().size() != 3)
23514        throw std::runtime_error("categorical_categories not set");
23515
23516    std::cout << " -> tests passed" << std::endl;
23517}
23518
23519void pd_test_groupby_by_labels() {
23520    std::cout << "========= groupby_by_labels() =========================";
23521
23522    pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0});
23523    std::vector<std::string> labels = {"X", "Y", "X", "Y"};
23524    auto gb = s.groupby_by_labels(labels, "object", true);
23525    auto sums = gb.sum();
23526    if (sums[0] != 4.0 || sums[1] != 6.0)
23527        throw std::runtime_error("sum mismatch");
23528    if (gb.grouper_dtype() != "object")
23529        throw std::runtime_error("grouper_dtype mismatch");
groupby_by_level (pd_test_3_all.cpp:23382)
23372int pd_test_df_construct_mi_main() {
23373    return dataframe_tests_df_construct_mi::pd_test_df_construct_mi_main();
23374}
23375// ------------------- pd_test_df_construct_mi (end) ---------------------------
23376
23377// ------------------- pd_test_groupby_level_dispatch.cpp (start) ---------------------------
23378namespace dataframe_tests_groupby_level_dispatch {
23379
23380void pd_test_groupby_level_single() {
23381    std::cout << "========= groupby_by_level(single) ====================";
23382
23383    pandas::Series<numpy::float64> s({10.0, 20.0, 30.0, 40.0});
23384    std::vector<std::vector<std::string>> level_values = {
23385        {"a", "a", "b", "b"}, {"x", "y", "x", "y"}
23386    };
23387    std::vector<std::optional<std::string>> level_names = {"first", "second"};
23388    auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23389    s.set_multiindex(mi);
23390
23391    auto gb = s.groupby_by_level(static_cast<size_t>(0), true);
groupby_by_level (pd_test_3_all.cpp:23382)
23372int pd_test_df_construct_mi_main() {
23373    return dataframe_tests_df_construct_mi::pd_test_df_construct_mi_main();
23374}
23375// ------------------- pd_test_df_construct_mi (end) ---------------------------
23376
23377// ------------------- pd_test_groupby_level_dispatch.cpp (start) ---------------------------
23378namespace dataframe_tests_groupby_level_dispatch {
23379
23380void pd_test_groupby_level_single() {
23381    std::cout << "========= groupby_by_level(single) ====================";
23382
23383    pandas::Series<numpy::float64> s({10.0, 20.0, 30.0, 40.0});
23384    std::vector<std::vector<std::string>> level_values = {
23385        {"a", "a", "b", "b"}, {"x", "y", "x", "y"}
23386    };
23387    std::vector<std::optional<std::string>> level_names = {"first", "second"};
23388    auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23389    s.set_multiindex(mi);
23390
23391    auto gb = s.groupby_by_level(static_cast<size_t>(0), true);
groupby_by_level_names (pd_test_3_all.cpp:23548)
23538    pandas::Series<std::string> by_s({"cat", "dog", "cat"});
23539    auto gb = s.groupby_by_string_series(by_s, true);
23540    auto sums = gb.sum();
23541    if (sums[0] != 400.0 || sums[1] != 200.0)
23542        throw std::runtime_error("sum mismatch");
23543
23544    std::cout << " -> tests passed" << std::endl;
23545}
23546
23547void pd_test_groupby_by_level_names() {
23548    std::cout << "========= groupby_by_level_names() ====================";
23549
23550    pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0});
23551    std::vector<std::vector<std::string>> level_values = {
23552        {"a", "a", "b", "b"}, {"x", "y", "x", "y"}
23553    };
23554    std::vector<std::optional<std::string>> level_names = {"first", "second"};
23555    auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23556    s.set_multiindex(mi);
23557
23558    std::vector<std::string> by_names = {"first", "second"};
groupby_by_numeric (pd_test_3_all.cpp:23483)
23473int pd_test_groupby_level_dispatch_main() {
23474    return dataframe_tests_groupby_level_dispatch::pd_test_groupby_level_dispatch_main();
23475}
23476// ------------------- pd_test_groupby_level_dispatch.cpp (end) ---------------------------
23477
23478// ------------------- pd_test_groupby_by_dispatch.cpp (start) ---------------------------
23479namespace dataframe_tests_groupby_by_dispatch {
23480
23481void pd_test_groupby_by_numeric() {
23482    std::cout << "========= groupby_by_numeric() ========================";
23483
23484    pandas::Series<numpy::float64> s({10.0, 20.0, 30.0, 40.0});
23485    pandas::Series<numpy::float64> by_s({1.0, 2.0, 1.0, 2.0});
23486    auto gb = s.groupby_by_numeric(by_s, true);
23487    if (gb.group_keys_order().size() != 2)
23488        throw std::runtime_error("expected 2 groups");
23489    auto sums = gb.sum();
23490    if (sums[0] != 40.0 || sums[1] != 60.0)
23491        throw std::runtime_error("sum mismatch");
23492    if (gb.grouper_dtype() != "float64")
groupby_by_string_series (pd_test_3_all.cpp:23535)
23525    auto sums = gb.sum();
23526    if (sums[0] != 4.0 || sums[1] != 6.0)
23527        throw std::runtime_error("sum mismatch");
23528    if (gb.grouper_dtype() != "object")
23529        throw std::runtime_error("grouper_dtype mismatch");
23530
23531    std::cout << " -> tests passed" << std::endl;
23532}
23533
23534void pd_test_groupby_by_string_series() {
23535    std::cout << "========= groupby_by_string_series() ==================";
23536
23537    pandas::Series<numpy::float64> s({100.0, 200.0, 300.0});
23538    pandas::Series<std::string> by_s({"cat", "dog", "cat"});
23539    auto gb = s.groupby_by_string_series(by_s, true);
23540    auto sums = gb.sum();
23541    if (sums[0] != 400.0 || sums[1] != 200.0)
23542        throw std::runtime_error("sum mismatch");
23543
23544    std::cout << " -> tests passed" << std::endl;
23545}
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833    std::cout << "========= map =========================================";
5834
5835    pandas::CategoricalArray arr({"yes", "no", "yes"});
5836    pandas::CategoricalIndex idx(arr);
5837
5838    std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839    pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841    bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842                   !mapped.has_category("yes") && !mapped.has_category("no"));
5843    if (!passed) {
5844        std::cout << "  [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845        throw std::runtime_error("pd_test_categorical_index_map failed");
5846    }
5847
5848    std::cout << " -> tests passed" << std::endl;
5849}
map (pd_test_1_all.cpp:5839)
5829// Map Tests
5830// ============================================================================
5831
5832void pd_test_categorical_index_map() {
5833    std::cout << "========= map =========================================";
5834
5835    pandas::CategoricalArray arr({"yes", "no", "yes"});
5836    pandas::CategoricalIndex idx(arr);
5837
5838    std::unordered_map<std::string, std::string> mapping = {{"yes", "1"}, {"no", "0"}};
5839    pandas::CategoricalIndex mapped = idx.map(mapping);
5840
5841    bool passed = (mapped.has_category("1") && mapped.has_category("0") &&
5842                   !mapped.has_category("yes") && !mapped.has_category("no"));
5843    if (!passed) {
5844        std::cout << "  [FAIL] : in pd_test_categorical_index_map()" << std::endl;
5845        throw std::runtime_error("pd_test_categorical_index_map failed");
5846    }
5847
5848    std::cout << " -> tests passed" << std::endl;
5849}
map_dict (pd_test_3_all.cpp:23599)
23589// ------------------- pd_test_groupby_by_dispatch.cpp (end) ---------------------------
23590
23591// ------------------- pd_test_map_dispatch.cpp (begin) ---------------------------
23592namespace dataframe_tests_map_dispatch {
23593
23594void pd_test_map_dispatch_dict_basic() {
23595    std::cout << "  pd_test_map_dispatch_dict_basic";
23596    // Dict mapping: 3 matched values + 1 unmapped -> NaN; name preserved
23597    pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, 4.0}, "vals");
23598    std::map<numpy::float64, numpy::float64> mapping = {{1.0, 10.0}, {2.0, 20.0}, {3.0, 30.0}};
23599    auto res = s.map_dict(mapping);
23600    if (res.name() != "vals")
23601        throw std::runtime_error("name not preserved");
23602    if (res[0] != 10.0 || res[1] != 20.0 || res[2] != 30.0)
23603        throw std::runtime_error("mapped values incorrect");
23604    if (!std::isnan(res[3]))
23605        throw std::runtime_error("unmapped key should be NaN");
23606    std::cout << " -> tests passed" << std::endl;
23607}
23608
23609void pd_test_map_dispatch_preserves_index() {
map_dict_resolved (pd_test_5_all.cpp:139935)
139925    pandas::Series<numpy::float64> s(data, std::optional<std::string>{});
139926    s.set_index(std::make_unique<pandas::Index<std::string>>(labels));
139927    s.set_dtype_override("int64");
139928    return s;
139929}
139930
139931static void f_plan_02_apply_result_dtype_517043_map_full_int(int& lf) {
139932    std::cout << "-- E_map_full_int\n";
139933    auto s = mk_src_int64({1, 2, 3}, {"a", "b", "c"});
139934    std::map<int64_t, int64_t> m{{1, 10}, {2, 20}, {3, 30}};
139935    auto r = s.map_dict_resolved(m);
139936    pandas_tests::check(result_series_dtype(r) == "int64",
139937        "E_map_full_int()_dtype", lf);
139938}
139939
139940static void f_plan_02_apply_result_dtype_517043_map_partial_int(int& lf) {
139941    std::cout << "-- E_map_partial_int\n";
139942    auto s = mk_src_int64({1, 2, 3}, {"a", "b", "c"});
139943    std::map<int64_t, int64_t> m{{1, 10}};
139944    auto r = s.map_dict_resolved(m);
139945    pandas_tests::check(result_series_dtype(r) == "float64",
map_series (pd_test_3_all.cpp:23633)
23623void pd_test_map_dispatch_series_lookup() {
23624    std::cout << "  pd_test_map_dispatch_series_lookup";
23625    // Series lookup via index labels returns correct mapped values
23626    // lookup: index=[1,2,3], values=[10,20,30]
23627    pandas::Series<numpy::float64> lookup({10.0, 20.0, 30.0}, "lk");
23628    pandas::Index<std::string> lk_idx({"1", "2", "3"});
23629    lookup.set_index(lk_idx);
23630
23631    pandas::Series<numpy::float64> s({2.0, 3.0, 1.0}, "src");
23632    auto res = s.map_series(lookup);
23633    if (res[0] != 20.0 || res[1] != 30.0 || res[2] != 10.0)
23634        throw std::runtime_error("series lookup values incorrect");
23635    if (res.name() != "src")
23636        throw std::runtime_error("name not preserved");
23637    std::cout << " -> tests passed" << std::endl;
23638}
23639
23640void pd_test_map_dispatch_nan_passthrough() {
23641    std::cout << "  pd_test_map_dispatch_nan_passthrough";
23642    // NaN in source passes through as NaN, non-NaN values mapped
map_series_resolved (pd_test_5_all.cpp:143266)
143256    std::map<int64_t, int64_t> m{{1, 10}};
143257    auto r = s.map_dict_resolved(m);
143258    check_dtype_eq("apply_empty_hist_case_30_empty_map_nonempty_dict_int64()",
143259                   result_series_dtype_full(r), "int64", lf);
143260}
143261
143262void case_31_empty_map_nonempty_series_int64(int& lf) {
143263    std::cout << "-- case_31_empty_map_nonempty_series_int64\n";
143264    auto s = mk_f64({}, {}, "int64");
143265    auto mapper = mk_f64({100, 200}, {"1", "2"}, "int64");
143266    auto r = s.map_series_resolved(mapper);
143267    check_dtype_eq("apply_empty_hist_case_31_empty_map_nonempty_series_int64()",
143268                   result_series_dtype_full(r), "int64", lf);
143269}
143270
143271void case_40_E1_empty_map_empty_series_int64(int& lf) {
143272    std::cout << "-- case_40_E1_empty_map_empty_series_int64\n";
143273    auto s = mk_f64({}, {}, "int64");
143274    auto mapper = mk_f64({}, {}, "int64");
143275    auto r = s.map_series_resolved(mapper);
143276    check_dtype_eq("apply_empty_hist_case_40_E1_empty_map_empty_series_int64()",
map_to_string (pd_test_3_all.cpp:23669)
23659void pd_test_map_dispatch_to_string() {
23660    std::cout << "  pd_test_map_dispatch_to_string";
23661    // map_to_string returns string values for matched, "NaN" for unmatched
23662    pandas::Series<std::string> lookup(std::vector<std::string>{"one", "two"}, "lk");
23663    pandas::Index<std::string> lk_idx({"1", "2"});
23664    lookup.set_index(lk_idx);
23665
23666    double nan_val = std::numeric_limits<double>::quiet_NaN();
23667    pandas::Series<numpy::float64> s({1.0, 2.0, 3.0, nan_val}, "src");
23668    auto res = s.map_to_string(lookup);
23669    if (res[0] != "one" || res[1] != "two")
23670        throw std::runtime_error("matched values incorrect");
23671    if (res[2] != "NaN")
23672        throw std::runtime_error("unmatched should be NaN string");
23673    if (res[3] != "NaN")
23674        throw std::runtime_error("NaN source should be NaN string");
23675    if (res.name() != "src")
23676        throw std::runtime_error("name not preserved");
23677    std::cout << " -> tests passed" << std::endl;
23678}
pipe (pd_test_1_all.cpp:11164)
11154            // Pipe applies function to entire Series
11155            auto add_mean = [](const pandas::Series<double>& ser, double offset) {
11156                auto mean_val = ser.mean();
11157                std::vector<double> result;
11158                for (size_t i = 0; i < ser.size(); ++i) {
11159                    result.push_back(ser[i] + mean_val.value_or(0.0) + offset);
11160                }
11161                return pandas::Series<double>(result, ser.name());
11162            };
11163
11164            auto result = s.pipe(add_mean, 10.0);
11165
11166            bool passed = true;
11167            // mean is 2.5, offset is 10.0, so each value + 12.5
11168            std::vector<double> expected = {13.5, 14.5, 15.5, 16.5};
11169            for (size_t i = 0; i < result.size(); ++i) {
11170                if (!approx_equal(result[i], expected[i])) {
11171                    passed = false;
11172                    std::cout << "  [FAIL] : in pd_test_func_apply_series_pipe() : value mismatch at " << i << std::endl;
11173                    throw std::runtime_error("pd_test_func_apply_series_pipe failed: value mismatch");
11174                }
resample (pd_test_1_all.cpp:20321)
20311                "2020-01-01 00:00:00",
20312                "2020-01-01 12:00:00",
20313                "2020-01-02 00:00:00",
20314                "2020-01-02 12:00:00",
20315                "2020-01-03 00:00:00",
20316                "2020-01-03 12:00:00"
20317            };
20318            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20319
20320            // Resample to daily
20321            auto resampler = df.resample("D");
20322            pandas::DataFrame result = resampler.sum();
20323
20324            // Check that we got aggregated results
20325            bool passed = (result.nrows() <= df.nrows());
20326
20327            if (!passed) {
20328                std::cout << "  [FAIL] : in pd_test_timeseries_resample_basic() : resample didn't reduce rows" << std::endl;
20329                throw std::runtime_error("pd_test_timeseries_resample_basic failed");
20330            }
rolling (pd_test_1_all.cpp:20667)
20657#include <vector>
20658#include "../pandas/pd_series.h"
20659
20660namespace dataframe_tests {
20661    namespace dataframe_tests_windowing {
20662
20663        void pd_test_rolling_sum() {
20664            std::cout << "========= Rolling sum ===========================";
20665
20666            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
20667            auto result = s.rolling(3).sum();
20668
20669            // Window 3:
20670            // idx 0: [1] -> NaN (not enough values)
20671            // idx 1: [1,2] -> NaN (not enough values)
20672            // idx 2: [1,2,3] -> 6
20673            // idx 3: [2,3,4] -> 9
20674            // idx 4: [3,4,5] -> 12
20675            bool passed = result.size() == 5;
20676            if (!passed) {
20677                std::cout << "  [FAIL] : in pd_test_rolling_sum() : result size should be 5" << std::endl;
transform (pd_test_1_all.cpp:11071)
11061            std::cout << " -> tests passed" << std::endl;
11062        }
11063
11064        void pd_test_func_apply_series_transform() {
11065            std::cout << "========= Series transform ============================";
11066
11067            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069            // Transform must return same shape
11070            auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072            bool passed = true;
11073            if (result.size() != s.size()) {
11074                passed = false;
11075                std::cout << "  [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076                throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077            }
11078
11079            std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080            for (size_t i = 0; i < result.size(); ++i) {
transform (pd_test_1_all.cpp:11071)
11061            std::cout << " -> tests passed" << std::endl;
11062        }
11063
11064        void pd_test_func_apply_series_transform() {
11065            std::cout << "========= Series transform ============================";
11066
11067            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0}, "values");
11068
11069            // Transform must return same shape
11070            auto result = s.transform([](double x) { return x * 2 + 1; });
11071
11072            bool passed = true;
11073            if (result.size() != s.size()) {
11074                passed = false;
11075                std::cout << "  [FAIL] : in pd_test_func_apply_series_transform() : size changed" << std::endl;
11076                throw std::runtime_error("pd_test_func_apply_series_transform failed: size changed");
11077            }
11078
11079            std::vector<double> expected = {3.0, 5.0, 7.0, 9.0};
11080            for (size_t i = 0; i < result.size(); ++i) {
transform_named_list (pd_test_3_all.cpp:27185)
27175    auto result = s.map_dict(mapping);
27176    check(result[0] == 100.0, "mapped 1->100");
27177    check(std::isnan(result[1]), "unmapped 5->NaN");
27178    check(result[2] == 300.0, "mapped 3->300");
27179}
27180
27181// Test 6: transform multi (list of named functions -> DataFrame)
27182void pd_test_transform_multi() {
27183    std::cout << "  -- pd_test_transform_multi --" << std::endl;
27184    Series<numpy::float64> s({4.0, 9.0, 16.0}, std::string("nums"));
27185    auto df = s.transform_named_list({"sqrt", "abs"});
27186    check(df.ncols() == 2, "2 columns");
27187    check(df.nrows() == 3, "3 rows");
27188    // Get sqrt column by index (first column = index 0)
27189    size_t sqrt_idx = df.get_column_index("sqrt");
27190    auto sqrt_series = df.column_to_series_f64(sqrt_idx);
27191    check(std::abs(sqrt_series[0] - 2.0) < 1e-10, "sqrt(4)==2");
27192    check(std::abs(sqrt_series[1] - 3.0) < 1e-10, "sqrt(9)==3");
27193    check(std::abs(sqrt_series[2] - 4.0) < 1e-10, "sqrt(16)==4");
27194    size_t abs_idx = df.get_column_index("abs");
27195    auto abs_series = df.column_to_series_f64(abs_idx);
transform_resolved (pd_test_5_all.cpp:98227)
98217    switch (cid) {
98218        case CbId::Int:   cb = cb_int(hist);    break;
98219        case CbId::Bool:  cb = cb_bool(hist);   break;
98220        case CbId::Float: cb = cb_float(hist);  break;
98221        case CbId::Str:   cb = cb_string(hist); break;
98222        case CbId::Mixed: cb = cb_mixed(hist);  break;
98223    }
98224
98225    pandas::Result r;
98226    try {
98227        r = s.transform_resolved(cb, hist);
98228    } catch (const std::exception& e) {
98229        std::string tag = std::string("transform src=") + src_name(sid) +
98230                          " cb=" + cb_name(cid) + " mode=" + mode_name(mid);
98231        std::cout << "[FAIL] : in f_27a_core_3094022_apply_resolved_typed_post_cb_dtype() "
98232                  << tag << " unexpected exception: " << e.what() << "\n";
98233        ++pandas_tests::g_failed; ++local_fail;
98234        ++pandas_tests::g_failed; ++local_fail;
98235        ++pandas_tests::g_failed; ++local_fail;
98236        return;
98237    }
add (pd_test_1_all.cpp:4844)
4834namespace dataframe_tests {
4835    namespace dataframe_tests_arithmetic {
4836
4837        void pd_test_arithmetic_series_named_ops() {
4838            std::cout << "========= Series named ops ======================";
4839
4840            pandas::Series<double> a({1.0, 2.0, 3.0});
4841            pandas::Series<double> b({4.0, 5.0, 6.0});
4842
4843            auto sum = a.add(b);
4844            bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4845            if (!passed) {
4846                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4847                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4848            }
4849
4850            auto diff = a.sub(b);
4851            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4852            if (!passed) {
4853                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
add (pd_test_1_all.cpp:4844)
4834namespace dataframe_tests {
4835    namespace dataframe_tests_arithmetic {
4836
4837        void pd_test_arithmetic_series_named_ops() {
4838            std::cout << "========= Series named ops ======================";
4839
4840            pandas::Series<double> a({1.0, 2.0, 3.0});
4841            pandas::Series<double> b({4.0, 5.0, 6.0});
4842
4843            auto sum = a.add(b);
4844            bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4845            if (!passed) {
4846                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4847                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4848            }
4849
4850            auto diff = a.sub(b);
4851            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4852            if (!passed) {
4853                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
add_dateoffset (pd_test_4_all.cpp:6330)
6320    std::vector<numpy::float64> v(ns.size());
6321    for (size_t i = 0; i < ns.size(); ++i) v[i] = static_cast<numpy::float64>(ns[i]);
6322    pandas::Series<numpy::float64> s(v);
6323    s.set_dtype_override("datetime64[ns]");
6324    return s;
6325}
6326
6327void test_add_dateoffset_tick_day() {
6328    auto s = make_dt_series({0, 86400000000000LL});
6329    pandas::Day d(5);
6330    auto out = s.add_dateoffset(d);
6331    EXPECT(out.size() == 2);
6332    EXPECT(static_cast<int64_t>(out[0]) == 5LL * 86400000000000LL);
6333    EXPECT(static_cast<int64_t>(out[1]) == 6LL * 86400000000000LL);
6334    EXPECT(out.dtype_name() == "datetime64[ns]");
6335}
6336
6337void test_sub_dateoffset_calendar_monthend() {
6338    // 2024-01-31 in ns
6339    int64_t jan31 = 1706659200LL * 1000000000LL;
6340    auto s = make_dt_series({jan31});
add_dateoffset_to_timedelta (pd_test_4_all.cpp:6397)
6387}
6388
6389void test_add_dateoffset_to_timedelta_precision() {
6390    int64_t a = 1LL << 54;
6391    int64_t b = a + 1;
6392    std::vector<numpy::float64> v = {static_cast<numpy::float64>(a),
6393                                     static_cast<numpy::float64>(b)};
6394    pandas::Series<numpy::float64> s(v);
6395    s.set_dtype_override("timedelta64[ns]");
6396    pandas::Day d(1);
6397    auto out = s.add_dateoffset_to_timedelta(d);
6398    int64_t add = 86400000000000LL;
6399    EXPECT(static_cast<int64_t>(out[0]) == a + add);
6400    // Note: float64 cannot precisely represent (a+1); this only verifies the
6401    // int64-space computation, not full lossless storage.
6402    EXPECT(static_cast<int64_t>(out[1]) - (a + add) <= 2);
6403}
6404
6405void test_groupby_by_callable_int_index() {
6406    std::vector<numpy::float64> v = {1.0, 2.0, 3.0, 4.0};
6407    pandas::Series<numpy::float64> s(v);
add_prefix (pd_test_2_all.cpp:4)
 1// ------------------- pd_test_add_prefix.cpp (start) -----------------------------
 2// dataframe_tests/pd_test_add_prefix.cpp
 3// Tests for DataFrame.add_prefix() and add_suffix() methods (pandas 2.0+ API)
 4#include <iostream>
 5#include <stdexcept>
 6#include <vector>
 7#include <string>
 8#include <map>
 9#include "../pandas/pd_dataframe.h"
10#include "../pandas/pd_groupby.h"
11
12// CRITICAL: No using namespace directives
add_suffix (pd_test_2_all.cpp:4)
 1// ------------------- pd_test_add_prefix.cpp (start) -----------------------------
 2// dataframe_tests/pd_test_add_prefix.cpp
 3// Tests for DataFrame.add_prefix() and add_suffix() methods (pandas 2.0+ API)
 4#include <iostream>
 5#include <stdexcept>
 6#include <vector>
 7#include <string>
 8#include <map>
 9#include "../pandas/pd_dataframe.h"
10#include "../pandas/pd_groupby.h"
11
12// CRITICAL: No using namespace directives
div (pd_test_1_all.cpp:4865)
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
4869                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: div failed");
4870            }
4871
4872            std::cout << " -> tests passed" << std::endl;
4873        }
4874
4875        void pd_test_arithmetic_series_floordiv_mod() {
div (pd_test_1_all.cpp:4865)
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
4869                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: div failed");
4870            }
4871
4872            std::cout << " -> tests passed" << std::endl;
4873        }
4874
4875        void pd_test_arithmetic_series_floordiv_mod() {
divide (pd_test_3_all.cpp:555)
545    if (mul_result.size() != 4) {
546        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548    }
549    // 10*2=20
550    if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553    }
554
555    // Test divide()
556    pandas::Series<numpy::float64> div_result = s1.divide(s2);
557    if (div_result.size() != 4) {
558        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : divide() size mismatch" << std::endl;
559        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide()");
560    }
561    // 10/2=5
562    if (std::abs(div_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
563        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : divide() value mismatch" << std::endl;
564        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: divide() value");
565    }
divmod (pd_test_3_all.cpp:12077)
12067    auto cov_val = s1.cov(s2);
12068    if (!cov_val.has_value()) {
12069        std::cout << "  [FAIL] : covariance should have a value" << std::endl;
12070        throw std::runtime_error("pd_test_series_corr_cov failed");
12071    }
12072
12073    std::cout << " -> tests passed" << std::endl;
12074}
12075
12076// ============================================================================
12077// Test 9: divmod()
12078// ============================================================================
12079void pd_test_series_divmod() {
12080    std::cout << "========= Series.divmod() ==========================";
12081
12082    std::vector<double> vals = {10.0, 20.0, 30.0};
12083    pandas::Series<double> s(vals, "test");
12084
12085    auto [quot, rem] = s.divmod(7.0);
12086
12087    // 10/7 = 1 remainder 3
divmod (pd_test_3_all.cpp:12077)
12067    auto cov_val = s1.cov(s2);
12068    if (!cov_val.has_value()) {
12069        std::cout << "  [FAIL] : covariance should have a value" << std::endl;
12070        throw std::runtime_error("pd_test_series_corr_cov failed");
12071    }
12072
12073    std::cout << " -> tests passed" << std::endl;
12074}
12075
12076// ============================================================================
12077// Test 9: divmod()
12078// ============================================================================
12079void pd_test_series_divmod() {
12080    std::cout << "========= Series.divmod() ==========================";
12081
12082    std::vector<double> vals = {10.0, 20.0, 30.0};
12083    pandas::Series<double> s(vals, "test");
12084
12085    auto [quot, rem] = s.divmod(7.0);
12086
12087    // 10/7 = 1 remainder 3
dot (pd_test_1_all.cpp:22594)
22584        std::cout << "====================================== [OK] pd_test_all_any test suite ========================== " << std::endl;
22585        return 0;
22586    }
22587
22588} // namespace dataframe_tests
22589// ------------------- pd_test_all_any.cpp (end) -----------------------------
22590
22591// ------------------- pd_test_dot.cpp (start) -----------------------------
22592// dataframe_tests/pd_test_dot.cpp
22593// Test DataFrame.dot() method - matrix multiplication
22594
22595#include <iostream>
22596#include <stdexcept>
22597#include <cmath>
22598#include "../pandas/pd_dataframe.h"
22599
22600// CRITICAL: No using namespace directives
22601
22602namespace dataframe_tests {
22603    namespace dataframe_tests_dot {
dot (pd_test_1_all.cpp:22594)
22584        std::cout << "====================================== [OK] pd_test_all_any test suite ========================== " << std::endl;
22585        return 0;
22586    }
22587
22588} // namespace dataframe_tests
22589// ------------------- pd_test_all_any.cpp (end) -----------------------------
22590
22591// ------------------- pd_test_dot.cpp (start) -----------------------------
22592// dataframe_tests/pd_test_dot.cpp
22593// Test DataFrame.dot() method - matrix multiplication
22594
22595#include <iostream>
22596#include <stdexcept>
22597#include <cmath>
22598#include "../pandas/pd_dataframe.h"
22599
22600// CRITICAL: No using namespace directives
22601
22602namespace dataframe_tests {
22603    namespace dataframe_tests_dot {
floordiv (pd_test_1_all.cpp:4881)
4871            std::cout << " -> tests passed" << std::endl;
4872        }
4873
4874        void pd_test_arithmetic_series_floordiv_mod() {
4875            std::cout << "========= Series floordiv/mod ===================";
4876
4877            pandas::Series<double> a({7.0, 8.0, 9.0});
4878            pandas::Series<double> b({2.0, 3.0, 4.0});
4879
4880            auto fd = a.floordiv(b);
4881            bool passed = std::abs(fd[0] - 3.0) < 0.001;  // 7 // 2 = 3
4882            if (!passed) {
4883                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4884                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4885            }
4886
4887            auto m = a.mod(b);
4888            passed = std::abs(m[0] - 1.0) < 0.001;  // 7 % 2 = 1
4889            if (!passed) {
4890                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
floordiv (pd_test_1_all.cpp:4881)
4871            std::cout << " -> tests passed" << std::endl;
4872        }
4873
4874        void pd_test_arithmetic_series_floordiv_mod() {
4875            std::cout << "========= Series floordiv/mod ===================";
4876
4877            pandas::Series<double> a({7.0, 8.0, 9.0});
4878            pandas::Series<double> b({2.0, 3.0, 4.0});
4879
4880            auto fd = a.floordiv(b);
4881            bool passed = std::abs(fd[0] - 3.0) < 0.001;  // 7 // 2 = 3
4882            if (!passed) {
4883                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4884                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4885            }
4886
4887            auto m = a.mod(b);
4888            passed = std::abs(m[0] - 1.0) < 0.001;  // 7 % 2 = 1
4889            if (!passed) {
4890                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
mod (pd_test_1_all.cpp:4888)
4878            pandas::Series<double> a({7.0, 8.0, 9.0});
4879            pandas::Series<double> b({2.0, 3.0, 4.0});
4880
4881            auto fd = a.floordiv(b);
4882            bool passed = std::abs(fd[0] - 3.0) < 0.001;  // 7 // 2 = 3
4883            if (!passed) {
4884                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4885                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4886            }
4887
4888            auto m = a.mod(b);
4889            passed = std::abs(m[0] - 1.0) < 0.001;  // 7 % 2 = 1
4890            if (!passed) {
4891                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
4892                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: mod failed");
4893            }
4894
4895            // Scalar operations
4896            auto fd_scalar = a.floordiv(2.0);
4897            passed = std::abs(fd_scalar[0] - 3.0) < 0.001 && std::abs(fd_scalar[1] - 4.0) < 0.001;
4898            if (!passed) {
mod (pd_test_1_all.cpp:4888)
4878            pandas::Series<double> a({7.0, 8.0, 9.0});
4879            pandas::Series<double> b({2.0, 3.0, 4.0});
4880
4881            auto fd = a.floordiv(b);
4882            bool passed = std::abs(fd[0] - 3.0) < 0.001;  // 7 // 2 = 3
4883            if (!passed) {
4884                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : floordiv failed" << std::endl;
4885                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: floordiv failed");
4886            }
4887
4888            auto m = a.mod(b);
4889            passed = std::abs(m[0] - 1.0) < 0.001;  // 7 % 2 = 1
4890            if (!passed) {
4891                std::cout << "  [FAIL] : in pd_test_arithmetic_series_floordiv_mod() : mod failed" << std::endl;
4892                throw std::runtime_error("pd_test_arithmetic_series_floordiv_mod failed: mod failed");
4893            }
4894
4895            // Scalar operations
4896            auto fd_scalar = a.floordiv(2.0);
4897            passed = std::abs(fd_scalar[0] - 3.0) < 0.001 && std::abs(fd_scalar[1] - 4.0) < 0.001;
4898            if (!passed) {
mul (pd_test_1_all.cpp:4858)
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
mul (pd_test_1_all.cpp:4858)
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
4862                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: mul failed");
4863            }
4864
4865            auto quot = a.div(b);
4866            passed = std::abs(quot[0] - 0.25) < 0.001;
4867            if (!passed) {
4868                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : div failed" << std::endl;
multiindex (pd_test_1_all.cpp:27024)
27014            pandas::DataFrame df(data);
27015
27016            auto result = df.value_counts();
27017            auto& counts = std::get<pandas::Series<numpy::int64>>(result);
27018
27019            if (!counts.has_multiindex()) {
27020                std::cout << "  [FAIL] : expected MultiIndex" << std::endl;
27021                throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: no multiindex");
27022            }
27023
27024            const auto& midx = counts.multiindex();
27025
27026            // Should have 2 levels
27027            if (midx.nlevels() != 2) {
27028                std::cout << "  [FAIL] : expected 2 levels, got " << midx.nlevels() << std::endl;
27029                throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: wrong nlevels");
27030            }
27031
27032            std::cout << " -> tests passed" << std::endl;
27033        }
multiply (pd_test_3_all.cpp:543)
533    if (sub_result.size() != 4) {
534        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536    }
537    // 10-2=8
538    if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541    }
542
543    // Test multiply()
544    pandas::Series<double> mul_result = s1.multiply(s2);
545    if (mul_result.size() != 4) {
546        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() size mismatch" << std::endl;
547        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply()");
548    }
549    // 10*2=20
550    if (std::abs(mul_result[static_cast<size_t>(0)] - 20.0) > 0.001) {
551        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : multiply() value mismatch" << std::endl;
552        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: multiply() value");
553    }
pow (pd_test_1_all.cpp:4911)
4901            }
4902
4903            std::cout << " -> tests passed" << std::endl;
4904        }
4905
4906        void pd_test_arithmetic_series_pow() {
4907            std::cout << "========= Series pow ============================";
4908
4909            pandas::Series<double> a({2.0, 3.0, 4.0});
4910
4911            auto p = a.pow(2.0);
4912            bool passed = std::abs(p[0] - 4.0) < 0.001 && std::abs(p[1] - 9.0) < 0.001 && std::abs(p[2] - 16.0) < 0.001;
4913            if (!passed) {
4914                std::cout << "  [FAIL] : in pd_test_arithmetic_series_pow() : pow scalar failed" << std::endl;
4915                throw std::runtime_error("pd_test_arithmetic_series_pow failed: pow scalar failed");
4916            }
4917
4918            // Series pow Series
4919            pandas::Series<double> exp({1.0, 2.0, 0.5});
4920            auto p2 = a.pow(exp);
4921            passed = std::abs(p2[0] - 2.0) < 0.001 && std::abs(p2[1] - 9.0) < 0.001;  // 3^2=9
pow (pd_test_1_all.cpp:4911)
4901            }
4902
4903            std::cout << " -> tests passed" << std::endl;
4904        }
4905
4906        void pd_test_arithmetic_series_pow() {
4907            std::cout << "========= Series pow ============================";
4908
4909            pandas::Series<double> a({2.0, 3.0, 4.0});
4910
4911            auto p = a.pow(2.0);
4912            bool passed = std::abs(p[0] - 4.0) < 0.001 && std::abs(p[1] - 9.0) < 0.001 && std::abs(p[2] - 16.0) < 0.001;
4913            if (!passed) {
4914                std::cout << "  [FAIL] : in pd_test_arithmetic_series_pow() : pow scalar failed" << std::endl;
4915                throw std::runtime_error("pd_test_arithmetic_series_pow failed: pow scalar failed");
4916            }
4917
4918            // Series pow Series
4919            pandas::Series<double> exp({1.0, 2.0, 0.5});
4920            auto p2 = a.pow(exp);
4921            passed = std::abs(p2[0] - 2.0) < 0.001 && std::abs(p2[1] - 9.0) < 0.001;  // 3^2=9
radd (pd_test_2_all.cpp:7440)
7430            if (std::isinf(a) && std::isinf(b)) return (a > 0) == (b > 0);
7431            return std::abs(a - b) < tol;
7432        }
7433
7434        // Helper to get double value from DataFrame at position
7435        double get_val(const pandas::DataFrame& df, size_t row, size_t col) {
7436            return df.iloc<numpy::float64>(row, col);
7437        }
7438
7439        void pd_test_radd_scalar() {
7440            std::cout << "========= radd() with scalar =====================";
7441
7442            // Create DataFrame: angles=[0, 3, 4], degrees=[360, 180, 360]
7443            std::map<std::string, std::vector<double>> data = {
7444                {"angles", {0.0, 3.0, 4.0}},
7445                {"degrees", {360.0, 180.0, 360.0}}
7446            };
7447            pandas::DataFrame df(data);
7448
7449            // df.radd(1) should be equivalent to 1 + df
7450            pandas::DataFrame result = df.radd(1.0);
radd (pd_test_2_all.cpp:7440)
7430            if (std::isinf(a) && std::isinf(b)) return (a > 0) == (b > 0);
7431            return std::abs(a - b) < tol;
7432        }
7433
7434        // Helper to get double value from DataFrame at position
7435        double get_val(const pandas::DataFrame& df, size_t row, size_t col) {
7436            return df.iloc<numpy::float64>(row, col);
7437        }
7438
7439        void pd_test_radd_scalar() {
7440            std::cout << "========= radd() with scalar =====================";
7441
7442            // Create DataFrame: angles=[0, 3, 4], degrees=[360, 180, 360]
7443            std::map<std::string, std::vector<double>> data = {
7444                {"angles", {0.0, 3.0, 4.0}},
7445                {"degrees", {360.0, 180.0, 360.0}}
7446            };
7447            pandas::DataFrame df(data);
7448
7449            // df.radd(1) should be equivalent to 1 + df
7450            pandas::DataFrame result = df.radd(1.0);
rdiv (pd_test_2_all.cpp:7713)
7703            }
7704
7705            if (!passed) {
7706                throw std::runtime_error("pd_test_rmul_with_fill_value failed");
7707            }
7708
7709            std::cout << " -> tests passed" << std::endl;
7710        }
7711
7712        void pd_test_rdiv_scalar() {
7713            std::cout << "========= rdiv() with scalar =====================";
7714
7715            // From pandas docs example: df.rdiv(10) divides 10 BY the dataframe
7716            std::map<std::string, std::vector<double>> data = {
7717                {"angles", {0.0, 3.0, 4.0}},
7718                {"degrees", {360.0, 180.0, 360.0}}
7719            };
7720            pandas::DataFrame df(data);
7721
7722            // df.rdiv(10) = 10 / df
7723            pandas::DataFrame result = df.rdiv(10.0);
rdivmod (pd_test_3_all.cpp:9176)
9166    // Check: val[2]=3, no condition matches -> default 0.0
9167    if (std::abs(result[static_cast<size_t>(2)] - 0.0) > 0.001) {
9168        std::cout << "  [FAIL] : in pd_test_3_all_series_case_when() : default value wrong" << std::endl;
9169        throw std::runtime_error("pd_test_3_all_series_case_when failed: default");
9170    }
9171
9172    std::cout << " -> tests passed" << std::endl;
9173}
9174
9175void pd_test_3_all_series_rdivmod() {
9176    std::cout << "========= Series.rdivmod() ========================";
9177
9178    std::vector<double> vals = {2.0, 3.0, 4.0, 5.0};
9179    pandas::Series<double> s(vals, "test");
9180
9181    // rdivmod: scalar / series -> (quotient, remainder)
9182    // 10 / 2 = 5, 10 % 2 = 0
9183    // 10 / 3 = 3, 10 % 3 = 1
9184    // 10 / 4 = 2, 10 % 4 = 2
9185    // 10 / 5 = 2, 10 % 5 = 0
9186    auto [quotients, remainders] = s.rdivmod(10.0);
rfloordiv (pd_test_2_all.cpp:7909)
7899            }
7900
7901            if (!passed) {
7902                throw std::runtime_error("pd_test_rtruediv_with_fill_value failed");
7903            }
7904
7905            std::cout << " -> tests passed" << std::endl;
7906        }
7907
7908        void pd_test_rfloordiv_scalar() {
7909            std::cout << "========= rfloordiv() with scalar ================";
7910
7911            std::map<std::string, std::vector<double>> data = {
7912                {"A", {3.0, 4.0}},
7913                {"B", {7.0, 8.0}}
7914            };
7915            pandas::DataFrame df(data);
7916
7917            // df.rfloordiv(10) = 10 // df (floor division)
7918            pandas::DataFrame result = df.rfloordiv(10.0);
rmod (pd_test_2_all.cpp:8121)
8111            }
8112
8113            if (!passed) {
8114                throw std::runtime_error("pd_test_rfloordiv_division_by_zero failed");
8115            }
8116
8117            std::cout << " -> tests passed" << std::endl;
8118        }
8119
8120        void pd_test_rmod_scalar() {
8121            std::cout << "========= rmod() with scalar =====================";
8122
8123            std::map<std::string, std::vector<double>> data = {
8124                {"A", {3.0, 4.0}}
8125            };
8126            pandas::DataFrame df(data);
8127
8128            // df.rmod(10) = 10 % df
8129            pandas::DataFrame result = df.rmod(10.0);
8130
8131            bool passed = true;
rmul (pd_test_2_all.cpp:7591)
7581            }
7582
7583            if (!passed) {
7584                throw std::runtime_error("pd_test_rsub_dataframe failed");
7585            }
7586
7587            std::cout << " -> tests passed" << std::endl;
7588        }
7589
7590        void pd_test_rmul_scalar() {
7591            std::cout << "========= rmul() with scalar =====================";
7592
7593            std::map<std::string, std::vector<double>> data = {
7594                {"A", {2.0, 3.0}},
7595                {"B", {4.0, 5.0}}
7596            };
7597            pandas::DataFrame df(data);
7598
7599            // df.rmul(10) = 10 * df
7600            pandas::DataFrame result = df.rmul(10.0);
rmul (pd_test_2_all.cpp:7591)
7581            }
7582
7583            if (!passed) {
7584                throw std::runtime_error("pd_test_rsub_dataframe failed");
7585            }
7586
7587            std::cout << " -> tests passed" << std::endl;
7588        }
7589
7590        void pd_test_rmul_scalar() {
7591            std::cout << "========= rmul() with scalar =====================";
7592
7593            std::map<std::string, std::vector<double>> data = {
7594                {"A", {2.0, 3.0}},
7595                {"B", {4.0, 5.0}}
7596            };
7597            pandas::DataFrame df(data);
7598
7599            // df.rmul(10) = 10 * df
7600            pandas::DataFrame result = df.rmul(10.0);
rpow (pd_test_2_all.cpp:8327)
8317            }
8318
8319            if (!passed) {
8320                throw std::runtime_error("pd_test_rmod_modulo_by_zero failed");
8321            }
8322
8323            std::cout << " -> tests passed" << std::endl;
8324        }
8325
8326        void pd_test_rpow_scalar() {
8327            std::cout << "========= rpow() with scalar =====================";
8328
8329            std::map<std::string, std::vector<double>> data = {
8330                {"A", {2.0, 3.0}},
8331                {"B", {0.0, 1.0}}
8332            };
8333            pandas::DataFrame df(data);
8334
8335            // df.rpow(2) = 2 ** df
8336            pandas::DataFrame result = df.rpow(2.0);
rsub (pd_test_2_all.cpp:7520)
7510            }
7511
7512            if (!passed) {
7513                throw std::runtime_error("pd_test_radd_dataframe failed");
7514            }
7515
7516            std::cout << " -> tests passed" << std::endl;
7517        }
7518
7519        void pd_test_rsub_scalar() {
7520            std::cout << "========= rsub() with scalar =====================";
7521
7522            std::map<std::string, std::vector<double>> data = {
7523                {"A", {1.0, 2.0, 3.0}},
7524                {"B", {4.0, 5.0, 6.0}}
7525            };
7526            pandas::DataFrame df(data);
7527
7528            // df.rsub(10) = 10 - df
7529            pandas::DataFrame result = df.rsub(10.0);
rsub (pd_test_2_all.cpp:7520)
7510            }
7511
7512            if (!passed) {
7513                throw std::runtime_error("pd_test_radd_dataframe failed");
7514            }
7515
7516            std::cout << " -> tests passed" << std::endl;
7517        }
7518
7519        void pd_test_rsub_scalar() {
7520            std::cout << "========= rsub() with scalar =====================";
7521
7522            std::map<std::string, std::vector<double>> data = {
7523                {"A", {1.0, 2.0, 3.0}},
7524                {"B", {4.0, 5.0, 6.0}}
7525            };
7526            pandas::DataFrame df(data);
7527
7528            // df.rsub(10) = 10 - df
7529            pandas::DataFrame result = df.rsub(10.0);
rtruediv (pd_test_2_all.cpp:7795)
7785            }
7786
7787            if (!passed) {
7788                throw std::runtime_error("pd_test_rdiv_dataframe failed");
7789            }
7790
7791            std::cout << " -> tests passed" << std::endl;
7792        }
7793
7794        void pd_test_rtruediv_scalar() {
7795            std::cout << "========= rtruediv() with scalar =================";
7796
7797            std::map<std::string, std::vector<double>> data = {
7798                {"A", {2.0, 4.0}}
7799            };
7800            pandas::DataFrame df(data);
7801
7802            // rtruediv is alias for rdiv
7803            pandas::DataFrame result = df.rtruediv(10.0);
7804
7805            bool passed = true;
sub (pd_test_1_all.cpp:4851)
4841            pandas::Series<double> a({1.0, 2.0, 3.0});
4842            pandas::Series<double> b({4.0, 5.0, 6.0});
4843
4844            auto sum = a.add(b);
4845            bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4846            if (!passed) {
4847                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
sub (pd_test_1_all.cpp:4851)
4841            pandas::Series<double> a({1.0, 2.0, 3.0});
4842            pandas::Series<double> b({4.0, 5.0, 6.0});
4843
4844            auto sum = a.add(b);
4845            bool passed = std::abs(sum[0] - 5.0) < 0.001 && std::abs(sum[1] - 7.0) < 0.001;
4846            if (!passed) {
4847                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : add failed" << std::endl;
4848                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: add failed");
4849            }
4850
4851            auto diff = a.sub(b);
4852            passed = std::abs(diff[0] - (-3.0)) < 0.001;
4853            if (!passed) {
4854                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : sub failed" << std::endl;
4855                throw std::runtime_error("pd_test_arithmetic_series_named_ops failed: sub failed");
4856            }
4857
4858            auto prod = a.mul(b);
4859            passed = std::abs(prod[0] - 4.0) < 0.001 && std::abs(prod[1] - 10.0) < 0.001;
4860            if (!passed) {
4861                std::cout << "  [FAIL] : in pd_test_arithmetic_series_named_ops() : mul failed" << std::endl;
sub_dateoffset (pd_test_4_all.cpp:6342)
6332    EXPECT(static_cast<int64_t>(out[0]) == 5LL * 86400000000000LL);
6333    EXPECT(static_cast<int64_t>(out[1]) == 6LL * 86400000000000LL);
6334    EXPECT(out.dtype_name() == "datetime64[ns]");
6335}
6336
6337void test_sub_dateoffset_calendar_monthend() {
6338    // 2024-01-31 in ns
6339    int64_t jan31 = 1706659200LL * 1000000000LL;
6340    auto s = make_dt_series({jan31});
6341    pandas::MonthEnd me(1);
6342    auto out = s.sub_dateoffset(me);
6343    auto neg = me.negate();
6344    auto ref = s.add_dateoffset(*neg);
6345    EXPECT(out.size() == 1);
6346    EXPECT(static_cast<int64_t>(out[0]) == static_cast<int64_t>(ref[0]));
6347    EXPECT(out.dtype_name() == "datetime64[ns]");
6348}
6349
6350void test_sub_dateoffset_equals_add_negated() {
6351    int64_t jan31 = 1706659200LL * 1000000000LL;
6352    auto s = make_dt_series({jan31, jan31 + 86400000000000LL});
subtract (pd_test_3_all.cpp:531)
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524    std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526    std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527    std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528    pandas::Series<double> s1(vals1, "s1");
529    pandas::Series<double> s2(vals2, "s2");
530
531    // Test subtract()
532    pandas::Series<double> sub_result = s1.subtract(s2);
533    if (sub_result.size() != 4) {
534        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
535        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract()");
536    }
537    // 10-2=8
538    if (std::abs(sub_result[static_cast<size_t>(0)] - 8.0) > 0.001) {
539        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() value mismatch" << std::endl;
540        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: subtract() value");
541    }
truediv (pd_test_3_all.cpp:524)
514    }
515
516    std::cout << " -> tests passed" << std::endl;
517}
518
519// ============================================================================
520// Category 3: Series Arithmetic Operations
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524    std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526    std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527    std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528    pandas::Series<double> s1(vals1, "s1");
529    pandas::Series<double> s2(vals2, "s2");
530
531    // Test subtract()
532    pandas::Series<double> sub_result = s1.subtract(s2);
533    if (sub_result.size() != 4) {
534        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
truediv (pd_test_3_all.cpp:524)
514    }
515
516    std::cout << " -> tests passed" << std::endl;
517}
518
519// ============================================================================
520// Category 3: Series Arithmetic Operations
521// ============================================================================
522
523void pd_test_3_all_series_arithmetic() {
524    std::cout << "========= Series.subtract/multiply/divide/truediv() =";
525
526    std::vector<double> vals1 = {10.0, 20.0, 30.0, 40.0};
527    std::vector<double> vals2 = {2.0, 4.0, 6.0, 8.0};
528    pandas::Series<double> s1(vals1, "s1");
529    pandas::Series<double> s2(vals2, "s2");
530
531    // Test subtract()
532    pandas::Series<double> sub_result = s1.subtract(s2);
533    if (sub_result.size() != 4) {
534        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : subtract() size mismatch" << std::endl;
compare (pd_test_1_all.cpp:13989)
13979            if (!approx_equal(std::stod(b_col.get_value_str(0)), 10.0)) {
13980                passed = false;
13981                std::cout << "  [FAIL] : in pd_test_joining_update() : column B was changed" << std::endl;
13982                throw std::runtime_error("pd_test_joining_update failed: B changed");
13983            }
13984
13985            std::cout << " -> tests passed" << std::endl;
13986        }
13987
13988        // =====================================================================
13989        // compare() Tests
13990        // =====================================================================
13991
13992        void pd_test_joining_compare() {
13993            std::cout << "========= compare =====================================";
13994
13995            std::map<std::string, std::vector<double>> left_data = {
13996                {"A", {1.0, 2.0, 3.0}},
13997                {"B", {10.0, 20.0, 30.0}}
13998            };
13999            pandas::DataFrame left(left_data);
eq (pd_test_2_all.cpp:19680)
19670    std::vector<pandas::Series<numpy::float64>> cols;
19671    cols.push_back(pandas::Series<numpy::float64>({1.0, 2.0}, "A"));
19672    cols.push_back(pandas::Series<numpy::float64>({3.0, 3.0}, "B"));
19673    pandas::DataFrame df(cols, {"A", "B"});
19674
19675    pandas::Series<numpy::float64> s({1.0, 3.0}, "vals");
19676    s.set_index(std::make_unique<pandas::Index<std::string>>(
19677        std::vector<std::string>{"A", "B"}));
19678
19679    auto result = df.eq(s, 1);
19680
19681    check(approx(result["A"].get_value_double(0), 1.0), "eq_A_r0_true");
19682    check(approx(result["A"].get_value_double(1), 0.0), "eq_A_r1_false");
19683    check(approx(result["B"].get_value_double(0), 1.0), "eq_B_r0_true");
19684    check(approx(result["B"].get_value_double(1), 1.0), "eq_B_r1_true");
19685}
19686
19687// Test 5: mul scalar broadcast (verify existing behavior still works)
19688void pd_test_broadcasting_mul_scalar() {
19689    std::cout << "  -- pd_test_broadcasting_mul_scalar --" << std::endl;
eq (pd_test_2_all.cpp:19680)
19670    std::vector<pandas::Series<numpy::float64>> cols;
19671    cols.push_back(pandas::Series<numpy::float64>({1.0, 2.0}, "A"));
19672    cols.push_back(pandas::Series<numpy::float64>({3.0, 3.0}, "B"));
19673    pandas::DataFrame df(cols, {"A", "B"});
19674
19675    pandas::Series<numpy::float64> s({1.0, 3.0}, "vals");
19676    s.set_index(std::make_unique<pandas::Index<std::string>>(
19677        std::vector<std::string>{"A", "B"}));
19678
19679    auto result = df.eq(s, 1);
19680
19681    check(approx(result["A"].get_value_double(0), 1.0), "eq_A_r0_true");
19682    check(approx(result["A"].get_value_double(1), 0.0), "eq_A_r1_false");
19683    check(approx(result["B"].get_value_double(0), 1.0), "eq_B_r0_true");
19684    check(approx(result["B"].get_value_double(1), 1.0), "eq_B_r1_true");
19685}
19686
19687// Test 5: mul scalar broadcast (verify existing behavior still works)
19688void pd_test_broadcasting_mul_scalar() {
19689    std::cout << "  -- pd_test_broadcasting_mul_scalar --" << std::endl;
equals (pd_test_1_all.cpp:5866)
5856    std::cout << "========= equals ======================================";
5857
5858    pandas::CategoricalArray arr1({"a", "b", "a"});
5859    pandas::CategoricalArray arr2({"a", "b", "a"});
5860    pandas::CategoricalArray arr3({"a", "b", "c"});
5861
5862    pandas::CategoricalIndex idx1(arr1);
5863    pandas::CategoricalIndex idx2(arr2);
5864    pandas::CategoricalIndex idx3(arr3);
5865
5866    bool passed = (idx1.equals(idx2) && !idx1.equals(idx3));
5867    if (!passed) {
5868        std::cout << "  [FAIL] : in pd_test_categorical_index_equals()" << std::endl;
5869        throw std::runtime_error("pd_test_categorical_index_equals failed");
5870    }
5871
5872    std::cout << " -> tests passed" << std::endl;
5873}
5874
5875void pd_test_categorical_index_identical() {
5876    std::cout << "========= identical ===================================";
ge (pd_test_3_all.cpp:303)
293    }
294
295    std::cout << " -> tests passed" << std::endl;
296}
297
298// ============================================================================
299// Category 2: DataFrame Comparison Operations
300// ============================================================================
301
302void pd_test_3_all_comparison_ops() {
303    std::cout << "========= DataFrame.eq/ne/lt/le/gt/ge() =============";
304
305    std::map<std::string, std::vector<double>> data1 = {
306        {"A", {1.0, 2.0, 3.0}},
307        {"B", {4.0, 5.0, 6.0}}
308    };
309    std::map<std::string, std::vector<double>> data2 = {
310        {"A", {1.0, 3.0, 3.0}},
311        {"B", {4.0, 4.0, 7.0}}
312    };
313    pandas::DataFrame df1(data1);
ge (pd_test_3_all.cpp:303)
293    }
294
295    std::cout << " -> tests passed" << std::endl;
296}
297
298// ============================================================================
299// Category 2: DataFrame Comparison Operations
300// ============================================================================
301
302void pd_test_3_all_comparison_ops() {
303    std::cout << "========= DataFrame.eq/ne/lt/le/gt/ge() =============";
304
305    std::map<std::string, std::vector<double>> data1 = {
306        {"A", {1.0, 2.0, 3.0}},
307        {"B", {4.0, 5.0, 6.0}}
308    };
309    std::map<std::string, std::vector<double>> data2 = {
310        {"A", {1.0, 3.0, 3.0}},
311        {"B", {4.0, 4.0, 7.0}}
312    };
313    pandas::DataFrame df1(data1);
gen (pd_test_5_all.cpp:35852)
35842    double pc = pct_change_pc(a, b);
35843    double pd = pct_change_pd(a, b);
35844    pandas_tests::check(std::abs(pc - pd) < 1e-12,
35845                        "case_12.formulas_within_ULP", local_fail);
35846}
35847
35848void bin_edge_412638_case_13_entropy_pct_change_invariance(int& local_fail) {
35849    // Generate prices via deterministic walk; compute returns by both
35850    // formulas; bin both; entropy should be IDENTICAL (bin assignments
35851    // not shifted by ULP-scale formula drift). Cycle-1 finding.
35852    std::mt19937_64 gen(42);
35853    std::normal_distribution<double> nd(0.0003, 0.02);
35854    std::vector<double> prices;
35855    prices.reserve(500);
35856    double s = 100.0;
35857    for (int i = 0; i < 500; ++i) {
35858        if (i > 0) s = s * std::exp(nd(gen));
35859        prices.push_back(s);
35860    }
35861    std::vector<double> r_pc, r_pd;
35862    for (size_t i = 1; i < prices.size(); ++i) {
gt (pd_test_3_all.cpp:344)
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341        throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342    }
343
344    // Test gt()
345    pandas::DataFrame gt_result = df1.gt(df2);
346    if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
348        throw std::runtime_error("pd_test_3_all_comparison_ops failed: gt() shape");
349    }
350
351    // Test ge()
352    pandas::DataFrame ge_result = df1.ge(df2);
353    if (ge_result.nrows() != 3 || ge_result.ncols() != 2) {
354        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ge() shape mismatch" << std::endl;
gt (pd_test_3_all.cpp:344)
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341        throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342    }
343
344    // Test gt()
345    pandas::DataFrame gt_result = df1.gt(df2);
346    if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
348        throw std::runtime_error("pd_test_3_all_comparison_ops failed: gt() shape");
349    }
350
351    // Test ge()
352    pandas::DataFrame ge_result = df1.ge(df2);
353    if (ge_result.nrows() != 3 || ge_result.ncols() != 2) {
354        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ge() shape mismatch" << std::endl;
le (pd_test_3_all.cpp:337)
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341        throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342    }
343
344    // Test gt()
345    pandas::DataFrame gt_result = df1.gt(df2);
346    if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
le (pd_test_3_all.cpp:337)
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
341        throw std::runtime_error("pd_test_3_all_comparison_ops failed: le() shape");
342    }
343
344    // Test gt()
345    pandas::DataFrame gt_result = df1.gt(df2);
346    if (gt_result.nrows() != 3 || gt_result.ncols() != 2) {
347        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : gt() shape mismatch" << std::endl;
lt (pd_test_3_all.cpp:330)
320        throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321    }
322
323    // Test ne()
324    pandas::DataFrame ne_result = df1.ne(df2);
325    if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
lt (pd_test_3_all.cpp:330)
320        throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321    }
322
323    // Test ne()
324    pandas::DataFrame ne_result = df1.ne(df2);
325    if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
334        throw std::runtime_error("pd_test_3_all_comparison_ops failed: lt() shape");
335    }
336
337    // Test le()
338    pandas::DataFrame le_result = df1.le(df2);
339    if (le_result.nrows() != 3 || le_result.ncols() != 2) {
340        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : le() shape mismatch" << std::endl;
ne (pd_test_3_all.cpp:323)
313    pandas::DataFrame df1(data1);
314    pandas::DataFrame df2(data2);
315
316    // Test eq()
317    pandas::DataFrame eq_result = df1.eq(df2);
318    if (eq_result.nrows() != 3 || eq_result.ncols() != 2) {
319        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : eq() shape mismatch" << std::endl;
320        throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321    }
322
323    // Test ne()
324    pandas::DataFrame ne_result = df1.ne(df2);
325    if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
ne (pd_test_3_all.cpp:323)
313    pandas::DataFrame df1(data1);
314    pandas::DataFrame df2(data2);
315
316    // Test eq()
317    pandas::DataFrame eq_result = df1.eq(df2);
318    if (eq_result.nrows() != 3 || eq_result.ncols() != 2) {
319        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : eq() shape mismatch" << std::endl;
320        throw std::runtime_error("pd_test_3_all_comparison_ops failed: eq() shape");
321    }
322
323    // Test ne()
324    pandas::DataFrame ne_result = df1.ne(df2);
325    if (ne_result.nrows() != 3 || ne_result.ncols() != 2) {
326        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : ne() shape mismatch" << std::endl;
327        throw std::runtime_error("pd_test_3_all_comparison_ops failed: ne() shape");
328    }
329
330    // Test lt()
331    pandas::DataFrame lt_result = df1.lt(df2);
332    if (lt_result.nrows() != 3 || lt_result.ncols() != 2) {
333        std::cout << "  [FAIL] : in pd_test_3_all_comparison_ops() : lt() shape mismatch" << std::endl;
argsort (pd_test_1_all.cpp:1304)
1294        std::cout << "========= DatetimeArray: sorting ======================= ";
1295
1296        pandas::DatetimeArray arr(std::vector<std::string>{
1297            "2023-06-15",
1298            "NaT",
1299            "2023-01-01",
1300            "2023-12-31"
1301        });
1302
1303        // argsort ascending
1304        auto indices = arr.argsort(true, "last");
1305        // Expected order: 2023-01-01(2), 2023-06-15(0), 2023-12-31(3), NaT(1)
1306        if (indices.getElementAt({0}) != 2) {
1307            std::cout << "  [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308            throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309        }
1310        if (indices.getElementAt({3}) != 1) {
1311            std::cout << "  [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312            throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313        }
rank (pd_test_1_all.cpp:6451)
6441        // =====================================================================
6442        // Test: Rank
6443        // =====================================================================
6444        void pd_test_dataframe_rank() {
6445            std::cout << "========= rank =============================";
6446
6447            // Test Series rank with default method (average)
6448            {
6449                std::vector<double> data = {3.0, 1.0, 4.0, 1.0, 5.0};
6450                pandas::Series<double> s(data, "test");
6451                auto ranked = s.rank();
6452
6453                // Values: 3, 1, 4, 1, 5 -> Sorted: 1, 1, 3, 4, 5
6454                // Ranks (average): 1.5, 1.5, 3, 4, 5
6455                // Original positions: 3->3, 1->1.5, 4->4, 1->1.5, 5->5
6456                double r0 = std::stod(ranked.get_value_str(0));  // 3.0 -> rank 3
6457                double r1 = std::stod(ranked.get_value_str(1));  // 1.0 -> rank 1.5
6458
6459                if (std::abs(r0 - 3.0) > 1e-10) {
6460                    std::cout << "  [FAIL] : in pd_test_dataframe_rank() : value 3.0 should have rank 3, got " << r0 << std::endl;
6461                    throw std::runtime_error("pd_test_dataframe_rank failed: value 3.0 rank");
searchsorted (pd_test_1_all.cpp:18958)
18948    // =========================================================================
18949    // Search Tests
18950    // =========================================================================
18951
18952    void pd_test_range_index_searchsorted() {
18953        std::cout << "========= searchsorted ================================ ";
18954
18955        pandas::RangeIndex ri(0, 10, 2);  // [0, 2, 4, 6, 8]
18956
18957        bool passed = (ri.searchsorted(4, "left") == 2 &&
18958                      ri.searchsorted(4, "right") == 3 &&
18959                      ri.searchsorted(3, "left") == 2 &&   // 3 would go between 2 and 4
18960                      ri.searchsorted(-1, "left") == 0 &&  // Before all
18961                      ri.searchsorted(10, "left") == 5);   // After all
18962
18963        if (!passed) {
18964            std::cout << "  [FAIL] : searchsorted" << std::endl;
18965            throw std::runtime_error("pd_test_range_index_searchsorted failed");
18966        }
sort_index (pd_test_3_all.cpp:583)
573    // 10/2=5
574    if (std::abs(truediv_result[static_cast<size_t>(0)] - 5.0) > 0.001) {
575        std::cout << "  [FAIL] : in pd_test_3_all_series_arithmetic() : truediv() value mismatch" << std::endl;
576        throw std::runtime_error("pd_test_3_all_series_arithmetic failed: truediv() value");
577    }
578
579    std::cout << " -> tests passed" << std::endl;
580}
581
582void pd_test_3_all_series_sort_index() {
583    std::cout << "========= Series.sort_index() ========================";
584
585    // NOTE: Series.sort_index() has an implementation issue:
586    // It calls index_->argsort() but argsort() is not virtual in IndexBase.
587    // This test verifies the function signature exists.
588    // When the implementation is fixed, this test should be updated.
589
590    std::vector<double> vals = {30.0, 10.0, 20.0};
591    pandas::Series<double> s(vals, "test");
592
593    // Verify the Series was created correctly
sort_values (pd_test_1_all.cpp:6408)
6398        void pd_test_dataframe_sorting() {
6399            std::cout << "========= sorting ==========================";
6400
6401            std::map<std::string, std::vector<numpy::float64>> data;
6402            data["A"] = {3.0, 1.0, 4.0, 1.0, 5.0};
6403            data["B"] = {9.0, 2.0, 6.0, 5.0, 3.0};
6404
6405            pandas::DataFrame df(data);
6406
6407            // Test sort_values ascending
6408            auto sorted_asc = df.sort_values("A", true);
6409            // First value should be smallest (1.0)
6410            std::string first_val = sorted_asc["A"].get_value_str(0);
6411            if (std::stod(first_val) != 1.0) {
6412                std::cout << "  [FAIL] : in pd_test_dataframe_sorting() : sort_values asc first != 1" << std::endl;
6413                throw std::runtime_error("pd_test_dataframe_sorting failed: sort_values asc first != 1");
6414            }
6415
6416            // Test sort_values descending
6417            auto sorted_desc = df.sort_values("A", false);
6418            first_val = sorted_desc["A"].get_value_str(0);
T_ (pd_test_1_all.cpp:16634)
16624        // =====================================================================
16625        // Transpose Tests
16626        // =====================================================================
16627
16628        void pd_test_ndframe_transpose() {
16629            std::cout << "========= transpose ============================================" << std::endl;
16630
16631            pandas::Series<int> s({1, 2, 3});
16632
16633            // For Series, T_() returns a copy
16634            auto transposed = s.T_();
16635            bool passed = transposed.size() == s.size();
16636            if (!passed) {
16637                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16638                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16639            }
16640
16641            passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16642            if (!passed) {
16643                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
explode (pd_test_1_all.cpp:6868)
6858                }
6859            }
6860
6861            // Test explode
6862            {
6863                std::map<std::string, std::vector<std::string>> data;
6864                data["id"] = {"1", "2"};
6865                data["tags"] = {"a,b,c", "d,e"};
6866                pandas::DataFrame df(data);
6867
6868                auto exploded = df.explode("tags");
6869                if (exploded.nrows() != 5) {  // 3 + 2 = 5 rows
6870                    std::cout << "  [FAIL] : in pd_test_dataframe_reshape() : explode nrows != 5, got " << exploded.nrows() << std::endl;
6871                    throw std::runtime_error("pd_test_dataframe_reshape failed: explode nrows");
6872                }
6873            }
6874
6875            // Test squeeze
6876            {
6877                std::map<std::string, std::vector<int>> data;
6878                data["A"] = {1};
squeeze (pd_test_1_all.cpp:6881)
6871                    throw std::runtime_error("pd_test_dataframe_reshape failed: explode nrows");
6872                }
6873            }
6874
6875            // Test squeeze
6876            {
6877                std::map<std::string, std::vector<int>> data;
6878                data["A"] = {1};
6879                pandas::DataFrame df(data);
6880
6881                auto squeezed = df.squeeze();
6882                // Should return without error for 1x1 DataFrame
6883            }
6884
6885            // Test stack
6886            {
6887                std::map<std::string, std::vector<int>> data;
6888                data["A"] = {1, 2};
6889                data["B"] = {3, 4};
6890                pandas::DataFrame df(data);
swapaxes (pd_test_3_all.cpp:2276)
2266    auto sorted_desc = arr.sort_values(false, "last");
2267    if (*sorted_desc[0] != "c" || *sorted_desc[1] != "b" ||
2268        *sorted_desc[2] != "a" || sorted_desc[3].has_value()) {
2269        throw std::runtime_error("sort_values descending failed");
2270    }
2271
2272    std::cout << " -> tests passed" << std::endl;
2273}
2274
2275void pd_test_3_all_categorical_swapaxes() {
2276    std::cout << "========= CategoricalArray.swapaxes() =================";
2277
2278    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2279    pandas::CategoricalArray arr(values);
2280
2281    auto result = arr.swapaxes(0, 0);
2282    if (result.size() != 3) {
2283        throw std::runtime_error("swapaxes failed");
2284    }
2285
2286    bool threw = false;
to_frame (pd_test_3_all.cpp:4931)
4921    size_t usage = mi.memory_usage(true);
4922    if (usage == 0) {
4923        throw std::runtime_error("memory_usage() should return > 0");
4924    }
4925
4926    std::cout << " -> tests passed" << std::endl;
4927}
4928
4929void pd_test_3_all_multiindex_to_frame() {
4930    std::cout << "========= MultiIndex.to_frame() =======================";
4931
4932    std::vector<std::vector<std::string>> arrays = {{"a", "b"}, {"x", "y"}};
4933    std::vector<std::optional<std::string>> names = {"first", "second"};
4934    pandas::MultiIndex mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
4935
4936    auto frame = mi.to_frame();
4937    if (frame.find("first") == frame.end() || frame.find("second") == frame.end()) {
4938        throw std::runtime_error("to_frame() missing columns");
4939    }
transpose (pd_test_1_all.cpp:16648)
16638                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() size" << std::endl;
16639                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() size");
16640            }
16641
16642            passed = transposed[0] == 1 && transposed[1] == 2 && transposed[2] == 3;
16643            if (!passed) {
16644                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : T_() values" << std::endl;
16645                throw std::runtime_error("pd_test_ndframe_transpose failed: T_() values");
16646            }
16647
16648            // Test transpose() alias
16649            auto transposed2 = s.transpose();
16650            passed = transposed2.size() == s.size();
16651            if (!passed) {
16652                std::cout << "  [FAIL] : in pd_test_ndframe_transpose() : transpose() size" << std::endl;
16653                throw std::runtime_error("pd_test_ndframe_transpose failed: transpose() size");
16654            }
16655
16656            std::cout << " -> tests passed" << std::endl;
16657        }
unstack (pd_test_3_all.cpp:1739)
1729    }
1730    if (s.size() != 3) {
1731        std::cout << "  [FAIL] : in pd_test_3_all_chainable_mutators() : Case H size" << std::endl;
1732        throw std::runtime_error("pd_test_3_all_chainable_mutators failed: Case H size");
1733    }
1734
1735    std::cout << " -> tests passed" << std::endl;
1736}
1737
1738void pd_test_3_all_dataframe_unstack() {
1739    std::cout << "========= DataFrame.unstack() ========================";
1740
1741    std::map<std::string, std::vector<double>> data = {
1742        {"A", {1.0, 2.0, 3.0}},
1743        {"B", {4.0, 5.0, 6.0}}
1744    };
1745    pandas::DataFrame df(data);
1746
1747    // Without MultiIndex, unstack() returns self (matches pandas behavior)
1748    pandas::DataFrame result = df.unstack();
align (pd_test_1_all.cpp:14035)
14025            if (!approx_equal(a_s1, 2.0) || !approx_equal(a_o1, 99.0)) {
14026                passed = false;
14027                std::cout << "  [FAIL] : in pd_test_joining_compare() : difference at row 1 not shown" << std::endl;
14028                throw std::runtime_error("pd_test_joining_compare failed: diff values");
14029            }
14030
14031            std::cout << " -> tests passed" << std::endl;
14032        }
14033
14034        // =====================================================================
14035        // align() Tests
14036        // =====================================================================
14037
14038        void pd_test_joining_align() {
14039            std::cout << "========= align =======================================";
14040
14041            std::map<std::string, std::vector<double>> left_data = {
14042                {"A", {1.0, 2.0}}
14043            };
14044            std::vector<std::string> left_idx = {"x", "y"};
14045            pandas::DataFrame left(left_data, std::make_unique<pandas::Index<std::string>>(left_idx));
aligned_binary_op (pd_test_3_all.cpp:26690)
26680void pd_test_aligned_arith_same_index() {
26681    std::cout << "  same index add ... ";
26682    ::pandas::Series<::numpy::float64> s1({1.0, 2.0, 3.0}, std::string("val"));
26683    s1.set_index(std::make_unique<::pandas::Index<std::string>>(
26684        std::vector<std::string>{"a", "b", "c"}));
26685    ::pandas::Series<::numpy::float64> s2({10.0, 20.0, 30.0}, std::string("val"));
26686    s2.set_index(std::make_unique<::pandas::Index<std::string>>(
26687        std::vector<std::string>{"a", "b", "c"}));
26688
26689    auto result = s1.aligned_binary_op(s2, [](double a, double b) { return a + b; });
26690    check(result.size() == 3, "size == 3");
26691    check(result[0] == 11.0, "a -> 11");
26692    check(result[1] == 22.0, "b -> 22");
26693    check(result[2] == 33.0, "c -> 33");
26694    check(result.name() == "val", "name preserved");
26695    std::cout << "test passed" << std::endl;
26696}
26697
26698void pd_test_aligned_arith_different_index() {
26699    std::cout << "  different index add ... ";
aligned_binary_op_cross (pd_test_3_all.cpp:26821)
26811void pd_test_aligned_arith_cross_type() {
26812    std::cout << "  cross-type alignment (int64 x float64) ... ";
26813    ::pandas::Series<::numpy::int64> s1({1, 2, 3}, std::string("v"));
26814    s1.set_index(std::make_unique<::pandas::Index<std::string>>(
26815        std::vector<std::string>{"a", "b", "c"}));
26816    ::pandas::Series<::numpy::float64> s2({10.5, 20.5, 30.5}, std::string("v"));
26817    s2.set_index(std::make_unique<::pandas::Index<std::string>>(
26818        std::vector<std::string>{"b", "c", "d"}));
26819
26820    auto result = s1.aligned_binary_op_cross(s2, [](double a, double b) { return a + b; });
26821    check(result.size() == 4, "size == 4");
26822    check(std::isnan(result[0]), "a -> NaN");
26823    check(result[1] == 12.5, "b -> 12.5");
26824    check(result[2] == 23.5, "c -> 23.5");
26825    check(std::isnan(result[3]), "d -> NaN");
26826    std::cout << "test passed" << std::endl;
26827}
26828
26829void pd_test_aligned_arith_dtype_coercion() {
26830    // Tests dtype coercion rules matching pandas 2.x behavior for all 5-type combos.
combine (pd_test_2_all.cpp:1700)
1690        std::cout << "====================================== [OK] pd_test_between_time test suite ========================== " << std::endl;
1691        return 0;
1692    }
1693
1694} // namespace dataframe_tests
1695// ------------------- pd_test_between_time.cpp (end) -----------------------------
1696
1697// ------------------- pd_test_combine.cpp (start) -----------------------------
1698// dataframe_tests/pd_test_combine.cpp
1699// Test for DataFrame.combine() - column-wise combine with another DataFrame
1700
1701#include <iostream>
1702#include <cmath>
1703#include <stdexcept>
1704#include "../pandas/pd_dataframe.h"
1705
1706// CRITICAL: No using namespace directives
1707
1708namespace dataframe_tests {
1709    namespace dataframe_tests_combine {
combine_first (pd_test_1_all.cpp:13889)
13879            if (!approx_equal(b1, 10.0) || !approx_equal(b2, 20.0)) {
13880                passed = false;
13881                std::cout << "  [FAIL] : in pd_test_joining_join_index() : matched rows wrong" << std::endl;
13882                throw std::runtime_error("pd_test_joining_join_index failed: match values");
13883            }
13884
13885            std::cout << " -> tests passed" << std::endl;
13886        }
13887
13888        // =====================================================================
13889        // combine_first() Tests
13890        // =====================================================================
13891
13892        void pd_test_joining_combine_first() {
13893            std::cout << "========= combine_first ===============================";
13894
13895            std::map<std::string, std::vector<double>> left_data = {
13896                {"A", {1.0, std::nan(""), 3.0}},
13897                {"B", {std::nan(""), 5.0, std::nan("")}}
13898            };
13899            std::vector<std::string> left_idx = {"x", "y", "z"};
concat (pd_test_1_all.cpp:17717)
17707}
17708
17709void pd_test_period_index_concat() {
17710    std::cout << "========= concat factory ==============================";
17711
17712    std::vector<int64_t> ordinals1 = {0, 1};
17713    std::vector<int64_t> ordinals2 = {2, 3};
17714    pandas::PeriodIndex idx1(ordinals1, "D");
17715    pandas::PeriodIndex idx2(ordinals2, "D");
17716
17717    pandas::PeriodIndex concatenated = pandas::PeriodIndex::concat({idx1, idx2});
17718
17719    bool passed = (concatenated.size() == 4);
17720    if (!passed) {
17721        std::cout << "  [FAIL] : in pd_test_period_index_concat()" << std::endl;
17722        throw std::runtime_error("pd_test_period_index_concat failed");
17723    }
17724
17725    std::cout << " -> tests passed" << std::endl;
17726}
asfreq (pd_test_1_all.cpp:2869)
2859        std::cout << "========= PeriodArray: asfreq ======================= ";
2860
2861        // Monthly to quarterly
2862        pandas::PeriodArray arr_m(std::vector<std::string>{
2863            "2024-01",
2864            "2024-04",
2865            "2024-07",
2866            "NaT"
2867        }, "M");
2868
2869        auto arr_q = arr_m.asfreq("Q");
2870        if (arr_q.size() != 4) {
2871            std::cout << "  [FAIL] : asfreq size should be 4" << std::endl;
2872            throw std::runtime_error("pd_test_period_array_asfreq failed: size");
2873        }
2874        if (arr_q.freqstr() != "Q") {
2875            std::cout << "  [FAIL] : asfreq freqstr should be 'Q'" << std::endl;
2876            throw std::runtime_error("pd_test_period_array_asfreq failed: freqstr");
2877        }
2878
2879        // Check NaT is preserved
asof (pd_test_2_all.cpp:366)
356        std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357        return 0;
358    }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
asof (pd_test_2_all.cpp:366)
356        std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357        return 0;
358    }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
asof (pd_test_2_all.cpp:366)
356        std::cout << "====================================== [OK] pd_test_add_prefix test suite ========================== " << std::endl;
357        return 0;
358    }
359
360} // namespace dataframe_tests
361// ------------------- pd_test_add_prefix.cpp (end) -----------------------------
362
363// ------------------- pd_test_asof.cpp (start) -----------------------------
364// dataframe_tests/pd_test_asof.cpp
365// Test for DataFrame.asof() method
366
367#include <iostream>
368#include <cmath>
369#include <stdexcept>
370#include <limits>
371#include "../pandas/pd_dataframe.h"
372
373// CRITICAL: No using namespace directives
374
375namespace dataframe_tests {
between_time (pd_test_2_all.cpp:1154)
1144        std::cout << "====================================== [OK] pd_test_at_time test suite ========================== " << std::endl;
1145        return 0;
1146    }
1147
1148} // namespace dataframe_tests
1149// ------------------- pd_test_at_time.cpp (end) -----------------------------
1150
1151// ------------------- pd_test_between_time.cpp (start) -----------------------------
1152// dataframe_tests/pd_test_between_time.cpp
1153// Tests for DataFrame.between_time() method (pandas 2.0+ API)
1154// Selects values between particular times of day from datetime-indexed DataFrame
1155#include <iostream>
1156#include <stdexcept>
1157#include <vector>
1158#include <string>
1159#include <map>
1160#include "../pandas/pd_dataframe.h"
1161
1162// CRITICAL: No using namespace directives
diff (pd_test_1_all.cpp:5171)
5161        }
5162
5163        void pd_test_arithmetic_dataframe_diff_shift() {
5164            std::cout << "========= DataFrame diff/shift ==================";
5165
5166            std::map<std::string, std::vector<double>> data;
5167            data["A"] = {1.0, 3.0, 6.0, 10.0};
5168            pandas::DataFrame df(data);
5169
5170            // diff: [NaN, 2, 3, 4]
5171            auto d = df.diff();
5172            std::string val = d["A"].get_value_str(1);
5173            bool passed = std::abs(std::stod(val) - 2.0) < 0.001;
5174            if (!passed) {
5175                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff failed" << std::endl;
5176                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff failed");
5177            }
5178
5179            // First element should be NaN
5180            val = d["A"].get_value_str(0);
5181            passed = std::isnan(std::stod(val));
pct_change (pd_test_1_all.cpp:4621)
4611                throw std::runtime_error("pd_test_aggregation_series_skew_kurt failed: kurtosis alias failed");
4612            }
4613
4614            std::cout << " -> tests passed" << std::endl;
4615        }
4616
4617        void pd_test_aggregation_series_pct_change() {
4618            std::cout << "========= Series pct_change =====================";
4619
4620            pandas::Series<double> s({100.0, 110.0, 121.0});
4621            auto pct = s.pct_change();
4622
4623            // First element should be NaN
4624            bool passed = std::isnan(pct[0]);
4625            if (!passed) {
4626                std::cout << "  [FAIL] : in pd_test_aggregation_series_pct_change() : first element should be NaN" << std::endl;
4627                throw std::runtime_error("pd_test_aggregation_series_pct_change failed: first element should be NaN");
4628            }
4629
4630            // Second element should be 0.1 (10% increase)
4631            passed = std::abs(pct[1] - 0.1) < 0.001;
shift (pd_test_1_all.cpp:5188)
5178            // First element should be NaN
5179            val = d["A"].get_value_str(0);
5180            passed = std::isnan(std::stod(val));
5181            if (!passed) {
5182                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : diff NaN failed" << std::endl;
5183                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: diff NaN failed");
5184            }
5185
5186            // shift: [NaN, 1, 3, 6]
5187            auto s = df.shift();
5188            val = s["A"].get_value_str(1);
5189            passed = std::abs(std::stod(val) - 1.0) < 0.001;
5190            if (!passed) {
5191                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_diff_shift() : shift failed" << std::endl;
5192                throw std::runtime_error("pd_test_arithmetic_dataframe_diff_shift failed: shift failed");
5193            }
5194
5195            std::cout << " -> tests passed" << std::endl;
5196        }
to_period (pd_test_2_all.cpp:14554)
14544        std::cout << "====================================== [OK] pd_test_to_parquet test suite ========================" << std::endl;
14545        return 0;
14546    }
14547
14548} // namespace dataframe_tests
14549// ------------------- pd_test_to_parquet.cpp (end) -----------------------------
14550
14551// ------------------- pd_test_to_period.cpp (start) -----------------------------
14552// dataframe_tests/pd_test_to_period.cpp
14553// Test suite for DataFrame.to_period() method
14554
14555#include <iostream>
14556#include <stdexcept>
14557#include <vector>
14558#include <string>
14559#include <map>
14560
14561#include "../pandas/pd_dataframe.h"
14562
14563// CRITICAL: No using namespace directives
to_timestamp (pd_test_1_all.cpp:2830)
2820    void pd_test_period_array_to_timestamp() {
2821        std::cout << "========= PeriodArray: to_timestamp ======================= ";
2822
2823        pandas::PeriodArray arr(std::vector<std::string>{
2824            "2024-01",
2825            "2024-06",
2826            "NaT"
2827        }, "M");
2828
2829        // to_timestamp with start
2830        auto ts_start = arr.to_timestamp("start");
2831        if (ts_start.size() != 3) {
2832            std::cout << "  [FAIL] : to_timestamp size should be 3" << std::endl;
2833            throw std::runtime_error("pd_test_period_array_to_timestamp failed: size");
2834        }
2835
2836        auto ts0 = ts_start[0];
2837        if (!ts0.has_value()) {
2838            std::cout << "  [FAIL] : ts_start[0] should have value" << std::endl;
2839            throw std::runtime_error("pd_test_period_array_to_timestamp failed: ts_start[0]");
2840        }
tz_convert (pd_test_2_all.cpp:17874)
17864        std::cout << "====================================== [OK] pd_test_transform test suite ========================== " << std::endl;
17865        return 0;
17866    }
17867
17868} // namespace dataframe_tests
17869// ------------------- pd_test_transform.cpp (end) -----------------------------
17870
17871// ------------------- pd_test_tz_convert.cpp (start) -----------------------------
17872// dataframe_tests/pd_test_tz_convert.cpp
17873// Test for DataFrame.tz_convert() method
17874
17875#include <iostream>
17876#include <stdexcept>
17877#include <cmath>
17878#include "../pandas/pd_dataframe.h"
17879
17880namespace dataframe_tests {
17881    namespace dataframe_tests_tz_convert {
17882
17883        void pd_test_tz_convert_basic() {
tz_localize (pd_test_1_all.cpp:1431)
1421            "2023-06-15"
1422        });
1423
1424        // Initially should be timezone-naive
1425        if (arr.is_tz_aware()) {
1426            std::cout << "  [FAIL] : array should be timezone-naive initially" << std::endl;
1427            throw std::runtime_error("pd_test_datetime_array_timezone failed: naive");
1428        }
1429
1430        // Localize to UTC
1431        auto localized = arr.tz_localize("UTC");
1432        if (!localized.is_tz_aware()) {
1433            std::cout << "  [FAIL] : localized array should be timezone-aware" << std::endl;
1434            throw std::runtime_error("pd_test_datetime_array_timezone failed: localize");
1435        }
1436
1437        // Verify timezone name in dtype
1438        auto dt = localized.dtype();
1439        if (!dt.is_tz_aware()) {
1440            std::cout << "  [FAIL] : dtype should be timezone-aware" << std::endl;
1441            throw std::runtime_error("pd_test_datetime_array_timezone failed: dtype tz");
to_clipboard (pd_test_2_all.cpp:10176)
10166        std::cout << "====================================== [OK] pd_test_swaplevel test suite ========================== " << std::endl;
10167        return 0;
10168    }
10169
10170} // namespace dataframe_tests
10171// ------------------- pd_test_swaplevel.cpp (end) -----------------------------
10172
10173// ------------------- pd_test_to_clipboard.cpp (start) -----------------------------
10174// pd_test_to_clipboard.cpp
10175// Tests for DataFrame.to_clipboard() method
10176
10177#include <iostream>
10178#include <string>
10179#include <vector>
10180#include <map>
10181#include <sstream>
10182#include <stdexcept>
10183#include <limits>
10184
10185#include "../pandas/pd_dataframe.h"
to_csv (pd_test_1_all.cpp:6967)
6957        void pd_test_dataframe_io() {
6958            std::cout << "========= I/O methods ======================";
6959
6960            std::map<std::string, std::vector<numpy::int64>> data;
6961            data["A"] = {1, 2, 3};
6962            data["B"] = {4, 5, 6};
6963
6964            pandas::DataFrame df(data);
6965
6966            // Test to_csv
6967            std::string csv = df.to_csv(false);
6968            if (csv.empty()) {
6969                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_csv empty" << std::endl;
6970                throw std::runtime_error("pd_test_dataframe_io failed: to_csv empty");
6971            }
6972            if (csv.find("A") == std::string::npos) {
6973                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_csv missing column name" << std::endl;
6974                throw std::runtime_error("pd_test_dataframe_io failed: to_csv missing column name");
6975            }
6976
6977            // Test to_json
to_dict (pd_test_1_all.cpp:13343)
13333        void pd_test_io_to_dict() {
13334            std::cout << "========= to_dict ================================";
13335
13336            std::map<std::string, std::vector<double>> data;
13337            data["A"] = {1.0, 2.0, 3.0};
13338            data["B"] = {4.0, 5.0, 6.0};
13339
13340            pandas::DataFrame df(data);
13341
13342            // Test list orientation
13343            auto dict_list = df.to_dict("list");
13344
13345            bool passed = (dict_list.count("A") > 0 && dict_list.count("B") > 0);
13346            passed = passed && (dict_list["A"].size() == 3);
13347            passed = passed && (dict_list["B"].size() == 3);
13348
13349            if (!passed) {
13350                std::cout << "  [FAIL] : in pd_test_io_to_dict() : to_dict list failed" << std::endl;
13351                throw std::runtime_error("pd_test_io_to_dict failed");
13352            }
to_excel (pd_test_2_all.cpp:3427)
3417        std::cout << "====================================== [OK] pd_test_eval test suite ========================== " << std::endl;
3418        return 0;
3419    }
3420
3421} // namespace dataframe_tests
3422// ------------------- pd_test_eval.cpp (end) -----------------------------
3423
3424// ------------------- pd_test_excel.cpp (start) -----------------------------
3425// dataframe_tests/pd_test_excel.cpp
3426// Test file for DataFrame.to_excel() method
3427
3428#include <iostream>
3429#include <fstream>
3430#include <stdexcept>
3431#include <map>
3432#include <vector>
3433#include <string>
3434#include <cmath>
3435#include <limits>
to_feather (pd_test_2_all.cpp:11158)
11148        std::cout << "====================================== [OK] pd_test_to_dict test suite ==========================" << std::endl;
11149        return 0;
11150    }
11151
11152} // namespace dataframe_tests
11153// ------------------- pd_test_to_dict.cpp (end) -----------------------------
11154
11155// ------------------- pd_test_to_feather.cpp (start) -----------------------------
11156// dataframe_tests/pd_test_to_feather.cpp
11157// Comprehensive tests for DataFrame.to_feather() method (pandas-compatible)
11158
11159#include <iostream>
11160#include <fstream>
11161#include <sstream>
11162#include <stdexcept>
11163#include <vector>
11164#include <map>
11165#include <string>
11166#include <cmath>
11167#include <limits>
to_hdf (pd_test_2_all.cpp:11613)
11603        std::cout << "====================================== [OK] pd_test_to_feather test suite ==========================" << std::endl;
11604        return 0;
11605    }
11606
11607} // namespace dataframe_tests
11608// ------------------- pd_test_to_feather.cpp (end) -----------------------------
11609
11610// ------------------- pd_test_to_hdf.cpp (start) -----------------------------
11611// dataframe_tests/pd_test_to_hdf.cpp
11612// Tests for DataFrame.to_hdf() method - Updated for real HDF5 format
11613
11614#include <iostream>
11615#include <stdexcept>
11616#include <map>
11617#include <vector>
11618#include <string>
11619#include <fstream>
11620#include <cstdio>
11621
11622#include "../pandas/pd_dataframe.h"
to_json (pd_test_1_all.cpp:6978)
6968            if (csv.empty()) {
6969                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_csv empty" << std::endl;
6970                throw std::runtime_error("pd_test_dataframe_io failed: to_csv empty");
6971            }
6972            if (csv.find("A") == std::string::npos) {
6973                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_csv missing column name" << std::endl;
6974                throw std::runtime_error("pd_test_dataframe_io failed: to_csv missing column name");
6975            }
6976
6977            // Test to_json
6978            std::string json = df.to_json("columns");
6979            if (json.empty()) {
6980                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_json empty" << std::endl;
6981                throw std::runtime_error("pd_test_dataframe_io failed: to_json empty");
6982            }
6983            if (json.find("{") == std::string::npos) {
6984                std::cout << "  [FAIL] : in pd_test_dataframe_io() : to_json not JSON" << std::endl;
6985                throw std::runtime_error("pd_test_dataframe_io failed: to_json not JSON");
6986            }
6987
6988            // Test to_string
to_latex (pd_test_2_all.cpp:9446)
9436        void pd_test_styler_to_latex() {
9437            std::cout << "========= to_latex =================================";
9438
9439            std::map<std::string, std::vector<double>> data = {
9440                {"A", {1.0, 2.0, 3.0}},
9441                {"B", {4.0, 5.0, 6.0}}
9442            };
9443            pandas::DataFrame df(data);
9444
9445            auto styler = df.style();
9446            std::string latex = styler.to_latex();
9447
9448            if (!contains(latex, "\\begin{tabular}")) {
9449                std::cout << "  [FAIL] : in pd_test_styler_to_latex() : did not produce tabular environment" << std::endl;
9450                throw std::runtime_error("pd_test_styler_to_latex failed: did not produce tabular environment");
9451            }
9452            if (!contains(latex, "\\end{tabular}")) {
9453                std::cout << "  [FAIL] : in pd_test_styler_to_latex() : did not close tabular environment" << std::endl;
9454                throw std::runtime_error("pd_test_styler_to_latex failed: did not close tabular environment");
9455            }
to_latex (pd_test_2_all.cpp:9446)
9436        void pd_test_styler_to_latex() {
9437            std::cout << "========= to_latex =================================";
9438
9439            std::map<std::string, std::vector<double>> data = {
9440                {"A", {1.0, 2.0, 3.0}},
9441                {"B", {4.0, 5.0, 6.0}}
9442            };
9443            pandas::DataFrame df(data);
9444
9445            auto styler = df.style();
9446            std::string latex = styler.to_latex();
9447
9448            if (!contains(latex, "\\begin{tabular}")) {
9449                std::cout << "  [FAIL] : in pd_test_styler_to_latex() : did not produce tabular environment" << std::endl;
9450                throw std::runtime_error("pd_test_styler_to_latex failed: did not produce tabular environment");
9451            }
9452            if (!contains(latex, "\\end{tabular}")) {
9453                std::cout << "  [FAIL] : in pd_test_styler_to_latex() : did not close tabular environment" << std::endl;
9454                throw std::runtime_error("pd_test_styler_to_latex failed: did not close tabular environment");
9455            }
to_latex (pd_test_2_all.cpp:9446)
9436        void pd_test_styler_to_latex() {
9437            std::cout << "========= to_latex =================================";
9438
9439            std::map<std::string, std::vector<double>> data = {
9440                {"A", {1.0, 2.0, 3.0}},
9441                {"B", {4.0, 5.0, 6.0}}
9442            };
9443            pandas::DataFrame df(data);
9444
9445            auto styler = df.style();
9446            std::string latex = styler.to_latex();
9447
9448            if (!contains(latex, "\\begin{tabular}")) {
9449                std::cout << "  [FAIL] : in pd_test_styler_to_latex() : did not produce tabular environment" << std::endl;
9450                throw std::runtime_error("pd_test_styler_to_latex failed: did not produce tabular environment");
9451            }
9452            if (!contains(latex, "\\end{tabular}")) {
9453                std::cout << "  [FAIL] : in pd_test_styler_to_latex() : did not close tabular environment" << std::endl;
9454                throw std::runtime_error("pd_test_styler_to_latex failed: did not close tabular environment");
9455            }
to_list (pd_test_1_all.cpp:10247)
10237    std::cout << " -> tests passed" << std::endl;
10238}
10239
10240void pd_test_extension_index_to_list() {
10241    std::cout << "========= to_list =========================";
10242
10243    pandas::CategoricalArray arr({"x", "y", "z"});
10244    pandas::CategoricalIndex idx(arr);
10245
10246    auto list = idx.to_list();
10247
10248    bool passed = (list.size() == 3 &&
10249                   list[0].has_value() && *list[0] == "x" &&
10250                   list[1].has_value() && *list[1] == "y" &&
10251                   list[2].has_value() && *list[2] == "z");
10252    if (!passed) {
10253        std::cout << "  [FAIL] : in pd_test_extension_index_to_list() : to_list check failed" << std::endl;
10254        throw std::runtime_error("pd_test_extension_index_to_list failed");
10255    }
to_markdown (pd_test_1_all.cpp:13466)
13456        void pd_test_io_to_markdown() {
13457            std::cout << "========= to_markdown ============================";
13458
13459            std::map<std::string, std::vector<double>> data;
13460            data["X"] = {10.0, 20.0};
13461            data["Y"] = {30.0, 40.0};
13462
13463            pandas::DataFrame df(data);
13464
13465            std::string md = df.to_markdown();
13466
13467            // Check for markdown table elements
13468            bool has_pipe = (md.find("|") != std::string::npos);
13469            bool has_separator = (md.find("---") != std::string::npos);
13470
13471            bool passed = has_pipe && has_separator;
13472
13473            if (!passed) {
13474                std::cout << "  [FAIL] : in pd_test_io_to_markdown() : invalid markdown format" << std::endl;
13475                throw std::runtime_error("pd_test_io_to_markdown failed");
to_markdown (pd_test_1_all.cpp:13466)
13456        void pd_test_io_to_markdown() {
13457            std::cout << "========= to_markdown ============================";
13458
13459            std::map<std::string, std::vector<double>> data;
13460            data["X"] = {10.0, 20.0};
13461            data["Y"] = {30.0, 40.0};
13462
13463            pandas::DataFrame df(data);
13464
13465            std::string md = df.to_markdown();
13466
13467            // Check for markdown table elements
13468            bool has_pipe = (md.find("|") != std::string::npos);
13469            bool has_separator = (md.find("---") != std::string::npos);
13470
13471            bool passed = has_pipe && has_separator;
13472
13473            if (!passed) {
13474                std::cout << "  [FAIL] : in pd_test_io_to_markdown() : invalid markdown format" << std::endl;
13475                throw std::runtime_error("pd_test_io_to_markdown failed");
to_markdown (pd_test_1_all.cpp:13466)
13456        void pd_test_io_to_markdown() {
13457            std::cout << "========= to_markdown ============================";
13458
13459            std::map<std::string, std::vector<double>> data;
13460            data["X"] = {10.0, 20.0};
13461            data["Y"] = {30.0, 40.0};
13462
13463            pandas::DataFrame df(data);
13464
13465            std::string md = df.to_markdown();
13466
13467            // Check for markdown table elements
13468            bool has_pipe = (md.find("|") != std::string::npos);
13469            bool has_separator = (md.find("---") != std::string::npos);
13470
13471            bool passed = has_pipe && has_separator;
13472
13473            if (!passed) {
13474                std::cout << "  [FAIL] : in pd_test_io_to_markdown() : invalid markdown format" << std::endl;
13475                throw std::runtime_error("pd_test_io_to_markdown failed");
to_numpy (pd_test_1_all.cpp:16764)
16754        // =====================================================================
16755        // to_numpy Tests
16756        // =====================================================================
16757
16758        void pd_test_ndframe_to_numpy() {
16759            std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761            pandas::Series<int> s({10, 20, 30});
16762
16763            auto arr = s.to_numpy();
16764
16765            bool passed = arr.getSize() == 3;
16766            if (!passed) {
16767                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768                throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769            }
16770
16771            passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772            if (!passed) {
16773                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_numpy (pd_test_1_all.cpp:16764)
16754        // =====================================================================
16755        // to_numpy Tests
16756        // =====================================================================
16757
16758        void pd_test_ndframe_to_numpy() {
16759            std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761            pandas::Series<int> s({10, 20, 30});
16762
16763            auto arr = s.to_numpy();
16764
16765            bool passed = arr.getSize() == 3;
16766            if (!passed) {
16767                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768                throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769            }
16770
16771            passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772            if (!passed) {
16773                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_numpy (pd_test_1_all.cpp:16764)
16754        // =====================================================================
16755        // to_numpy Tests
16756        // =====================================================================
16757
16758        void pd_test_ndframe_to_numpy() {
16759            std::cout << "========= to_numpy =============================================" << std::endl;
16760
16761            pandas::Series<int> s({10, 20, 30});
16762
16763            auto arr = s.to_numpy();
16764
16765            bool passed = arr.getSize() == 3;
16766            if (!passed) {
16767                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : size" << std::endl;
16768                throw std::runtime_error("pd_test_ndframe_to_numpy failed: size");
16769            }
16770
16771            passed = arr.getElementAt({0}) == 10 && arr.getElementAt({1}) == 20 && arr.getElementAt({2}) == 30;
16772            if (!passed) {
16773                std::cout << "  [FAIL] : in pd_test_ndframe_to_numpy() : values" << std::endl;
to_orc (pd_test_2_all.cpp:13769)
13759        std::cout << "====================================== [OK] pd_test_to_markdown test suite ========================== " << std::endl;
13760        return 0;
13761    }
13762
13763} // namespace dataframe_tests
13764// ------------------- pd_test_to_markdown.cpp (end) -----------------------------
13765
13766// ------------------- pd_test_to_orc.cpp (start) -----------------------------
13767// dataframe_tests/pd_test_to_orc.cpp
13768// Tests for DataFrame.to_orc() method
13769
13770#include <iostream>
13771#include <stdexcept>
13772#include <map>
13773#include <vector>
13774#include <string>
13775#include <fstream>
13776#include <cstdio>
13777
13778#include "../pandas/pd_dataframe.h"
to_parquet (pd_test_2_all.cpp:14117)
14107        std::cout << "====================================== [OK] pd_test_to_orc test suite ==========================" << std::endl;
14108        return 0;
14109    }
14110
14111} // namespace dataframe_tests
14112// ------------------- pd_test_to_orc.cpp (end) -----------------------------
14113
14114// ------------------- pd_test_to_parquet.cpp (start) -----------------------------
14115// dataframe_tests/pd_test_to_parquet.cpp
14116// Tests for DataFrame.to_parquet() method
14117
14118#include <iostream>
14119#include <stdexcept>
14120#include <map>
14121#include <vector>
14122#include <string>
14123#include <fstream>
14124#include <cstdio>
14125
14126#include "../pandas/pd_dataframe.h"
to_pickle (pd_test_2_all.cpp:14906)
14896        std::cout << "====================================== [OK] pd_test_to_period test suite ========================== " << std::endl;
14897        return 0;
14898    }
14899
14900} // namespace dataframe_tests
14901// ------------------- pd_test_to_period.cpp (end) -----------------------------
14902
14903// ------------------- pd_test_to_pickle.cpp (start) -----------------------------
14904// dataframe_tests/pd_test_to_pickle.cpp
14905// Tests for DataFrame.to_pickle() method
14906
14907#include <iostream>
14908#include <stdexcept>
14909#include <map>
14910#include <vector>
14911#include <string>
14912#include <fstream>
14913#include <cstdio>
14914
14915#include "../pandas/pd_dataframe.h"
to_sql (pd_test_2_all.cpp:15576)
15566        std::cout << "====================================== [OK] pd_test_to_records test suite ========================== " << std::endl;
15567        return 0;
15568    }
15569
15570} // namespace dataframe_tests
15571// ------------------- pd_test_to_records.cpp (end) -----------------------------
15572
15573// ------------------- pd_test_to_sql.cpp (start) -----------------------------
15574// dataframe_tests/pd_test_to_sql.cpp
15575// Tests for DataFrame.to_sql() method
15576
15577#include <iostream>
15578#include <stdexcept>
15579#include <map>
15580#include <vector>
15581#include <string>
15582#include <fstream>
15583#include <sstream>
15584#include <cstdio>
to_stata (pd_test_2_all.cpp:16133)
16123        std::cout << "====================================== [OK] pd_test_to_sql test suite ==========================" << std::endl;
16124        return 0;
16125    }
16126
16127} // namespace dataframe_tests
16128// ------------------- pd_test_to_sql.cpp (end) -----------------------------
16129
16130// ------------------- pd_test_to_stata.cpp (start) -----------------------------
16131// pd_test_to_stata.cpp
16132// Tests for DataFrame.to_stata() method
16133
16134#include <iostream>
16135#include <fstream>
16136#include <string>
16137#include <vector>
16138#include <map>
16139#include <cstring>
16140#include <stdexcept>
16141
16142#include "../pandas/pd_dataframe.h"
to_string (pd_test_1_all.cpp:2693)
2683        pandas::PeriodArray arr_m(std::vector<std::string>{
2684            "2020-01",
2685            "NaT",
2686            "2025-06"
2687        }, "M");
2688
2689        // Year
2690        auto years = arr_m.year();
2691        auto y0 = years[0];
2692        if (!y0.has_value() || y0.value() != 2020) {
2693            std::cout << "  [FAIL] : year[0] should be 2020, got " << (y0.has_value() ? std::to_string(y0.value()) : "NA") << std::endl;
2694            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[0]");
2695        }
2696
2697        auto y1 = years[1];
2698        if (y1.has_value()) {
2699            std::cout << "  [FAIL] : year[1] should be NA (NaT)" << std::endl;
2700            throw std::runtime_error("pd_test_period_array_year_month_quarter failed: year[1] should be NA");
2701        }
2702
2703        auto y2 = years[2];
to_string_full (pd_test_5_all.cpp:64951)
64941static pandas::Series<numpy::float64> make_s(
64942    const std::vector<double>& v,
64943    const std::optional<std::string>& name = std::nullopt) {
64944    return pandas::Series<numpy::float64>(v, name);
64945}
64946
64947static const double PINF = std::numeric_limits<double>::infinity();
64948static const double NINF = -std::numeric_limits<double>::infinity();
64949static const double DNAN = std::numeric_limits<double>::quiet_NaN();
64950
64951// Convenience: call to_string_full() with pandas defaults (index=true,
64952// length=false, dtype=true, name=true). We strip the trailer here too so we
64953// match `pd.Series(...).to_string()`'s captured output, which OMITS the dtype
64954// trailer (Python `Series.to_string()` defaults `dtype=False`).
64955static std::string call_to_string_full(
64956    const pandas::Series<numpy::float64>& s) {
64957    std::string raw = s.to_string_full(
64958        /*buf=*/nullptr,
64959        /*na_rep=*/"NaN",
64960        /*float_format=*/"",
64961        /*header=*/false,
to_string_vector (pd_test_1_all.cpp:10871)
10861    std::cout << " -> tests passed" << std::endl;
10862}
10863
10864void pd_test_extension_index_to_string_vector() {
10865    std::cout << "========= to_string_vector =========================";
10866
10867    pandas::CategoricalArray arr({"a", std::nullopt, "c"});
10868    pandas::CategoricalIndex idx(arr);
10869
10870    auto str_vec = idx.to_string_vector();
10871
10872    bool passed = (str_vec.size() == 3 &&
10873                   str_vec[0] == "a" && str_vec[1] == "NA" && str_vec[2] == "c");
10874    if (!passed) {
10875        std::cout << "  [FAIL] : in pd_test_extension_index_to_string_vector() : to_string_vector check failed" << std::endl;
10876        throw std::runtime_error("pd_test_extension_index_to_string_vector failed");
10877    }
10878
10879    std::cout << " -> tests passed" << std::endl;
10880}
to_xarray (pd_test_2_all.cpp:16928)
16918        std::cout << "====================================== [OK] pd_test_to_timestamp test suite ========================== " << std::endl;
16919        return 0;
16920    }
16921
16922} // namespace dataframe_tests
16923// ------------------- pd_test_to_timestamp.cpp (end) -----------------------------
16924
16925// ------------------- pd_test_to_xarray.cpp (start) -----------------------------
16926// dataframe_tests/pd_test_to_xarray.cpp
16927// Test for DataFrame.to_xarray() - Convert DataFrame to xarray Dataset
16928
16929#include <iostream>
16930#include <stdexcept>
16931#include <string>
16932#include <vector>
16933#include <map>
16934#include <memory>
16935#include <cmath>
16936
16937#include "../pandas/pd_dataframe.h"
tolist (pd_test_3_all.cpp:2300)
2290        threw = true;
2291    }
2292    if (!threw) {
2293        throw std::runtime_error("swapaxes should throw for invalid axes");
2294    }
2295
2296    std::cout << " -> tests passed" << std::endl;
2297}
2298
2299void pd_test_3_all_categorical_to_list() {
2300    std::cout << "========= CategoricalArray.to_list()/tolist() =========";
2301
2302    std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "c"};
2303    pandas::CategoricalArray arr(values);
2304
2305    auto list = arr.to_list();
2306    if (list.size() != 4 || *list[0] != "a" || *list[1] != "b" ||
2307        list[2].has_value() || *list[3] != "c") {
2308        throw std::runtime_error("to_list failed");
2309    }
astype (pd_test_1_all.cpp:21292)
21282            std::cout << "========= astype all columns to float64 =============";
21283
21284            // Create DataFrame with int64 columns
21285            std::map<std::string, std::vector<numpy::int64>> data;
21286            data["A"] = {1, 2, 3, 4, 5};
21287            data["B"] = {10, 20, 30, 40, 50};
21288
21289            pandas::DataFrame df(data);
21290
21291            // Convert all columns to float64
21292            pandas::DataFrame df_float = df.astype("float64");
21293
21294            // Verify dtype changed
21295            pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297            bool passed = true;
21298            if (dtypes[static_cast<size_t>(0)] != "float64") {
21299                std::cout << "  [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300                passed = false;
21301            }
21302            if (dtypes[static_cast<size_t>(1)] != "float64") {
astype (pd_test_1_all.cpp:21292)
21282            std::cout << "========= astype all columns to float64 =============";
21283
21284            // Create DataFrame with int64 columns
21285            std::map<std::string, std::vector<numpy::int64>> data;
21286            data["A"] = {1, 2, 3, 4, 5};
21287            data["B"] = {10, 20, 30, 40, 50};
21288
21289            pandas::DataFrame df(data);
21290
21291            // Convert all columns to float64
21292            pandas::DataFrame df_float = df.astype("float64");
21293
21294            // Verify dtype changed
21295            pandas::Series<std::string> dtypes = df_float.dtypes();
21296
21297            bool passed = true;
21298            if (dtypes[static_cast<size_t>(0)] != "float64") {
21299                std::cout << "  [FAIL] : in pd_test_astype_all_columns_to_float64() : column A dtype is " << dtypes[static_cast<size_t>(0)] << ", expected float64" << std::endl;
21300                passed = false;
21301            }
21302            if (dtypes[static_cast<size_t>(1)] != "float64") {
astype_dtype (pd_test_5_all.cpp:43633)
43623        "0  a\n"
43624        "1  b\n"
43625        "2  c";
43626    check_case("dtype_extension_dt_complex_fallback_925116_case_6",
43627               df, actual, expected, "string", local_fail);
43628}
43629
43630void f_dtype_extension_dt_complex_fallback_925116_case_7_series_string_astype_string_drops_override(int& local_fail) {
43631    std::cout << "-- case_7_series_string_astype_string_drops_override\n";
43632    pandas::Series<std::string> s({"a", "b", "c"});
43633    auto r_box = s.astype_dtype("string");
43634    auto* r = dynamic_cast<pandas::Series<std::string>*>(r_box.get());
43635    if (r == nullptr) {
43636        pandas_tests::check(false, "case_7.astype_returned_non_string_series", local_fail);
43637        return;
43638    }
43639    pandas::DataFrame df = r->to_frame(std::optional<std::string>("v"));
43640    std::string actual = df.to_string();
43641
43642    std::cout << "    src_dtype=" << show_dtype(s)
43643              << " astype_result_dtype=" << show_dtype(*r) << "\n";
bool_ (pd_test_1_all.cpp:9120)
9110void pd_test_datetime_mixin_array_constructor() {
9111    std::cout << "========= DatetimeTDMixin array constructor =========================";
9112
9113    // Create DatetimeArray with some values
9114    numpy::NDArray<numpy::datetime64> data(std::vector<size_t>{3});
9115    data.setElementAt({0}, numpy::datetime64(1000000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2001
9116    data.setElementAt({1}, numpy::datetime64(1500000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2017
9117    data.setElementAt({2}, numpy::datetime64(1600000000000000000LL, numpy::DateTimeUnit::Nanosecond));  // ~2020
9118
9119    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{3});
9120    mask.setElementAt({0}, numpy::bool_(false));
9121    mask.setElementAt({1}, numpy::bool_(false));
9122    mask.setElementAt({2}, numpy::bool_(false));
9123
9124    pandas::DatetimeArray arr(data, mask);
9125    pandas::DatetimeTDMixin idx(arr, "timestamps");
9126
9127    bool passed = (idx.size() == 3 && !idx.empty() &&
9128                   idx.name().has_value() && *idx.name() == "timestamps" &&
9129                   idx.inferred_type() == "datetime");
9130    if (!passed) {
bool_vec_to_cond (pd_test_3_all.cpp:26108)
26098    auto result = s.where(cond, other);
26099    if (!result.name().has_value() || result.name().value() != "my_series") {
26100        throw std::runtime_error("Name not preserved");
26101    }
26102    std::cout << "PASSED" << std::endl;
26103}
26104
26105void pd_test_where_series_bool_vec() {
26106    std::cout << "  bool_vec_to_cond conversion... ";
26107    std::vector<bool> bvec = {true, false, true, false, true};
26108    auto cond_arr = ::pandas::Series<::numpy::float64>::bool_vec_to_cond(bvec);
26109    if (cond_arr.getSize() != 5) {
26110        throw std::runtime_error("Size mismatch");
26111    }
26112    for (size_t i = 0; i < 5; ++i) {
26113        if (cond_arr.getElementAt({i}) != bvec[i]) {
26114            throw std::runtime_error("Value mismatch at index " + std::to_string(i));
26115        }
26116    }
26117    std::cout << "PASSED" << std::endl;
26118}
convert_dtypes (pd_test_1_all.cpp:27317)
27307        void pd_test_convert_dtypes_integer_strings() {
27308            std::cout << "========= convert_dtypes: integer strings ============";
27309
27310            // Create DataFrame with string column containing integers
27311            std::map<std::string, std::vector<std::string>> data;
27312            data["a"] = {"1", "2", "3", "4", "5"};
27313
27314            pandas::DataFrame df(data);
27315
27316            // Convert dtypes
27317            pandas::DataFrame converted = df.convert_dtypes();
27318
27319            // After conversion, should be int64
27320            pandas::Series<std::string> dtypes_after = converted.dtypes();
27321            std::string dtype_a = dtypes_after[static_cast<size_t>(0)];
27322
27323            // Verify the dtype was converted to Int64 (nullable integer, per pandas convert_dtypes behavior)
27324            bool passed = (dtype_a == "Int64" || dtype_a == "int64");
27325            if (!passed) {
27326                std::cout << "  [FAIL] : in pd_test_convert_dtypes_integer_strings() : expected Int64, got " << dtype_a << std::endl;
27327                throw std::runtime_error("pd_test_convert_dtypes_integer_strings failed: dtype mismatch");
copy (pd_test_1_all.cpp:5798)
5788// ============================================================================
5789// Copy/Rename Tests
5790// ============================================================================
5791
5792void pd_test_categorical_index_copy() {
5793    std::cout << "========= copy ========================================";
5794
5795    pandas::CategoricalArray arr({"a", "b", "c"});
5796    pandas::CategoricalIndex idx(arr, "original");
5797
5798    pandas::CategoricalIndex copied = idx.copy();
5799
5800    bool passed = (copied.size() == idx.size() && copied.name() == idx.name() &&
5801                   copied.categories() == idx.categories() && copied.ordered() == idx.ordered());
5802    if (!passed) {
5803        std::cout << "  [FAIL] : in pd_test_categorical_index_copy()" << std::endl;
5804        throw std::runtime_error("pd_test_categorical_index_copy failed");
5805    }
5806
5807    std::cout << " -> tests passed" << std::endl;
5808}
copy_series_metadata_from (pd_test_3_all.cpp:27251)
27241// Test 9: copy_series_metadata_from
27242void pd_test_copy_series_metadata() {
27243    std::cout << "  -- pd_test_copy_series_metadata --" << std::endl;
27244    Series<numpy::float64> source({1.0, 2.0, 3.0}, std::string("src"));
27245    source.set_index(std::make_unique<Index<std::string>>(
27246        std::vector<std::string>{"a", "b", "c"}));
27247    source.set_dtype_override("int64");
27248
27249    Series<numpy::float64> target({10.0, 20.0, 30.0});
27250    target.copy_series_metadata_from(source);
27251    check(target.name() == "src", "name copied");
27252    check(target.index().size() == 3, "index copied");
27253    check(target.dtype_name() == "int64", "dtype_override copied");
27254}
27255
27256int pd_test_apply_inference_main() {
27257    std::cout << "====================================== pd_test_apply_inference ========================== " << std::endl;
27258    g_pass = 0;
27259    g_fail = 0;
27260    try {
infer_objects (pd_test_1_all.cpp:27595)
27585            // Create DataFrame with string column containing integers
27586            std::map<std::string, std::vector<std::string>> data;
27587            data["A"] = {"1", "2", "3", "4", "5"};
27588
27589            pandas::DataFrame df(data);
27590
27591            // Before inference, dtype should be string/object
27592            std::string before_dtype = df["A"].dtype_name();
27593
27594            // Apply infer_objects
27595            pandas::DataFrame result = df.infer_objects();
27596
27597            // After inference, dtype should be int64
27598            std::string after_dtype = result["A"].dtype_name();
27599
27600            bool passed = (after_dtype == "int64");
27601            if (!passed) {
27602                std::cout << "  [FAIL] : in pd_test_infer_objects_integer_column() : expected int64, got " << after_dtype << std::endl;
27603                throw std::runtime_error("pd_test_infer_objects_integer_column failed");
27604            }
view (pd_test_3_all.cpp:2147)
2137        throw std::runtime_error("memory_usage shallow too small");
2138    }
2139    if (deep < shallow) {
2140        throw std::runtime_error("memory_usage deep should be >= shallow");
2141    }
2142
2143    std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147    std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150    pandas::CategoricalArray arr(values);
2151
2152    auto raveled = arr.ravel();
2153    if (raveled.size() != 3 || !raveled.equals(arr)) {
2154        throw std::runtime_error("ravel failed");
2155    }
2156
2157    auto viewed = arr.view();
items (pd_test_1_all.cpp:16554)
16544        // =====================================================================
16545        // Iteration Tests (items, keys)
16546        // =====================================================================
16547
16548        void pd_test_ndframe_items_keys() {
16549            std::cout << "========= items/keys ===========================================" << std::endl;
16550
16551            pandas::Series<int> s({10, 20, 30});
16552
16553            // Test items()
16554            std::vector<std::string> collected_keys;
16555            std::vector<int> collected_values;
16556
16557            s.items([&](const std::string& key, int value) {
16558                collected_keys.push_back(key);
16559                collected_values.push_back(value);
16560            });
16561
16562            bool passed = collected_keys.size() == 3;
16563            if (!passed) {
items (pd_test_1_all.cpp:16554)
16544        // =====================================================================
16545        // Iteration Tests (items, keys)
16546        // =====================================================================
16547
16548        void pd_test_ndframe_items_keys() {
16549            std::cout << "========= items/keys ===========================================" << std::endl;
16550
16551            pandas::Series<int> s({10, 20, 30});
16552
16553            // Test items()
16554            std::vector<std::string> collected_keys;
16555            std::vector<int> collected_values;
16556
16557            s.items([&](const std::string& key, int value) {
16558                collected_keys.push_back(key);
16559                collected_values.push_back(value);
16560            });
16561
16562            bool passed = collected_keys.size() == 3;
16563            if (!passed) {
keys (pd_test_1_all.cpp:16319)
16309            }
16310
16311            // Test default value
16312            passed = attrs.get<int>("missing", 99) == 99;
16313            if (!passed) {
16314                std::cout << "  [FAIL] : in pd_test_ndframe_attrs() : default value" << std::endl;
16315                throw std::runtime_error("pd_test_ndframe_attrs failed: default value");
16316            }
16317
16318            // Test keys
16319            auto keys = attrs.keys();
16320            passed = keys.size() == 3;
16321            if (!passed) {
16322                std::cout << "  [FAIL] : in pd_test_ndframe_attrs() : keys()" << std::endl;
16323                throw std::runtime_error("pd_test_ndframe_attrs failed: keys()");
16324            }
16325
16326            // Test remove
16327            passed = attrs.remove("count") && !attrs.contains("count");
16328            if (!passed) {
16329                std::cout << "  [FAIL] : in pd_test_ndframe_attrs() : remove" << std::endl;
duplicated (pd_test_1_all.cpp:10583)
10573    std::cout << " -> tests passed" << std::endl;
10574}
10575
10576void pd_test_extension_index_duplicated() {
10577    std::cout << "========= duplicated =========================";
10578
10579    pandas::CategoricalArray arr({"a", "b", "a", "c", "a"});
10580    pandas::CategoricalIndex idx(arr);
10581
10582    auto dup_mask = idx.duplicated("first");
10583
10584    bool passed = (dup_mask.getElementAt({0}) == false &&
10585                   dup_mask.getElementAt({1}) == false &&
10586                   dup_mask.getElementAt({2}) == true &&
10587                   dup_mask.getElementAt({3}) == false &&
10588                   dup_mask.getElementAt({4}) == true);
10589    if (!passed) {
10590        std::cout << "  [FAIL] : in pd_test_extension_index_duplicated() : duplicated check failed" << std::endl;
10591        throw std::runtime_error("pd_test_extension_index_duplicated failed");
10592    }
isin (pd_test_1_all.cpp:5938)
5928    std::cout << " -> tests passed" << std::endl;
5929}
5930
5931void pd_test_categorical_index_isin() {
5932    std::cout << "========= inherited isin ==============================";
5933
5934    pandas::CategoricalArray arr({"a", "b", "c", "d"});
5935    pandas::CategoricalIndex idx(arr);
5936
5937    std::vector<std::string> values = {"a", "c"};
5938    numpy::NDArray<numpy::bool_> mask = idx.isin(values);
5939
5940    bool passed = (mask.getSize() == 4 &&
5941                   mask.getElementAt({0}) == true &&   // a
5942                   mask.getElementAt({1}) == false &&  // b
5943                   mask.getElementAt({2}) == true &&   // c
5944                   mask.getElementAt({3}) == false);   // d
5945    if (!passed) {
5946        std::cout << "  [FAIL] : in pd_test_categorical_index_isin()" << std::endl;
5947        throw std::runtime_error("pd_test_categorical_index_isin failed");
5948    }
unique (pd_test_1_all.cpp:1345)
1335        pandas::DatetimeArray arr(std::vector<std::string>{
1336            "2023-01-01",
1337            "2023-06-15",
1338            "2023-01-01",
1339            "NaT",
1340            "2023-06-15",
1341            "NaT"
1342        });
1343
1344        // unique
1345        auto uniq = arr.unique();
1346        // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1347        if (uniq.size() != 3) {
1348            std::cout << "  [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1349            throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1350        }
1351
1352        // factorize
1353        auto [codes, uniques] = arr.factorize();
1354        // Codes for NaT should be -1
1355        if (codes.getElementAt({3}) != -1) {
is_na_at (pd_test_5_all.cpp:35205)
35195    pandas::DataFrame df;
35196    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35197    pandas_tests::check(df["X"].get_value_bool(0) == true,  "case_3.idx0_true",   local_fail);
35198    pandas_tests::check(df["X"].get_value_bool(1) == false, "case_3.idx1_NA_false", local_fail);
35199    pandas_tests::check(df["X"].get_value_bool(2) == false, "case_3.idx2_false",  local_fail);
35200}
35201
35202void bool_nullable_826495_case_4_is_na_at_mask_aware(int& local_fail) {
35203    pandas::DataFrame df;
35204    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35205    pandas_tests::check(df["X"].is_na_at(0) == false, "case_4.idx0_not_na", local_fail);
35206    pandas_tests::check(df["X"].is_na_at(1) == true,  "case_4.idx1_is_na",  local_fail);
35207    pandas_tests::check(df["X"].is_na_at(2) == false, "case_4.idx2_not_na", local_fail);
35208}
35209
35210void bool_nullable_826495_case_5_fillna_preserves_dtype(int& local_fail) {
35211    pandas::DataFrame df;
35212    df.add_column_nullable<bool>("X", {true, pandas::NA_BOOL, false});
35213    pandas_tests::check(df["X"].dtype_name() == "boolean", "case_5.pre_dtype", local_fail);
35214    auto df_filled = df.fillna(1.0);
35215    pandas_tests::check(df_filled["X"].dtype_name() == "boolean",
GlobalUnlock (pd_test_2_all.cpp:10220)
10210                return "";
10211            }
10212
10213            char* pszText = static_cast<char*>(GlobalLock(hData));
10214            if (pszText == nullptr) {
10215                CloseClipboard();
10216                return "";
10217            }
10218
10219            std::string text(pszText);
10220            GlobalUnlock(hData);
10221            CloseClipboard();
10222
10223            return text;
10224        }
10225#else
10226        std::string get_clipboard_text() {
10227            // Non-Windows: just return empty (can't easily read clipboard)
10228            return "";
10229        }
10230#endif
abs (pd_test_1_all.cpp:283)
273            std::optional<bool>(true)
274        });
275
276        auto s = arr.sum();
277        if (!s.has_value() || s.value() != 3) {
278            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : sum should be 3" << std::endl;
279            throw std::runtime_error("pd_test_boolean_array_reductions failed: sum");
280        }
281
282        auto m = arr.mean();
283        if (!m.has_value() || std::abs(m.value() - 0.75) > 0.001) {
284            std::cout << "  [FAIL] : in pd_test_boolean_array_reductions() : mean should be 0.75" << std::endl;
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
all_values_whole_number (pd_test_5_all.cpp:30090)
30080                !src_map_ov.empty() ? src_map_ov : src_ser_dt;
30081            bool is_int_like =
30082                (src_effective.find("int") != std::string::npos ||
30083                 src_effective.find("uint") != std::string::npos);
30084            bool comb_has_col = combined.has_column(flat);
30085            bool comb_hasnans = false, comb_allwhole = false;
30086            std::string comb_dt = "<missing>";
30087            if (comb_has_col) {
30088                const pandas::NDFrameBase& c = combined[flat];
30089                comb_hasnans = c.hasnans();
30090                comb_allwhole = c.all_values_whole_number();
30091                comb_dt = c.dtype_name();
30092            }
30093            bool would_apply = is_int_like && comb_has_col &&
30094                               !comb_hasnans && comb_allwhole;
30095            std::cout << tag << " flat=" << flat
30096                      << " src_effective=" << (src_effective.empty() ? "<empty>" : src_effective)
30097                      << " is_int_like=" << is_int_like
30098                      << " comb_dt=" << comb_dt
30099                      << " comb_hasnans=" << comb_hasnans
30100                      << " comb_allwhole=" << comb_allwhole
argmax (pd_test_1_all.cpp:1323)
1313        }
1314
1315        // argmin
1316        auto min_idx = arr.argmin();
1317        if (!min_idx.has_value() || min_idx.value() != 2) {
1318            std::cout << "  [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320        }
1321
1322        // argmax
1323        auto max_idx = arr.argmax();
1324        if (!max_idx.has_value() || max_idx.value() != 3) {
1325            std::cout << "  [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
1327        }
1328
1329        std::cout << " -> tests passed" << std::endl;
1330    }
1331
1332    void pd_test_datetime_array_unique() {
1333        std::cout << "========= DatetimeArray: unique/factorize ======================= ";
argmin (pd_test_1_all.cpp:1316)
1306        if (indices.getElementAt({0}) != 2) {
1307            std::cout << "  [FAIL] : argsort: first should be index 2 (2023-01-01)" << std::endl;
1308            throw std::runtime_error("pd_test_datetime_array_sorting failed: argsort first");
1309        }
1310        if (indices.getElementAt({3}) != 1) {
1311            std::cout << "  [FAIL] : argsort: last should be index 1 (NaT)" << std::endl;
1312            throw std::runtime_error("pd_test_datetime_array_sorting failed: NaT position");
1313        }
1314
1315        // argmin
1316        auto min_idx = arr.argmin();
1317        if (!min_idx.has_value() || min_idx.value() != 2) {
1318            std::cout << "  [FAIL] : argmin should be 2 (2023-01-01)" << std::endl;
1319            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmin");
1320        }
1321
1322        // argmax
1323        auto max_idx = arr.argmax();
1324        if (!max_idx.has_value() || max_idx.value() != 3) {
1325            std::cout << "  [FAIL] : argmax should be 3 (2023-12-31)" << std::endl;
1326            throw std::runtime_error("pd_test_datetime_array_sorting failed: argmax");
autocorr (pd_test_3_all.cpp:11904)
11894    auto result2 = s.asof("2020-01-05");
11895    if (!result2.has_value() || std::abs(*result2 - 4.0) > 0.001) {
11896        std::cout << "  [FAIL] : asof after all dates incorrect" << std::endl;
11897        throw std::runtime_error("pd_test_series_asof failed");
11898    }
11899
11900    std::cout << " -> tests passed" << std::endl;
11901}
11902
11903// ============================================================================
11904// Test 3: autocorr()
11905// ============================================================================
11906void pd_test_series_autocorr() {
11907    std::cout << "========= Series.autocorr() ========================";
11908
11909    // Create a simple series with known autocorrelation
11910    std::vector<double> vals = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0};
11911    pandas::Series<double> s(vals, "test");
11912
11913    auto result = s.autocorr(1);
11914    if (!result.has_value()) {
axes (pd_test_1_all.cpp:16602)
16592        // =====================================================================
16593        // Axes Tests
16594        // =====================================================================
16595
16596        void pd_test_ndframe_axes() {
16597            std::cout << "========= axes =================================================" << std::endl;
16598
16599            pandas::Series<double> s({1.0, 2.0, 3.0});
16600
16601            auto axes = s.axes();
16602
16603            bool passed = axes.size() == 1;
16604            if (!passed) {
16605                std::cout << "  [FAIL] : in pd_test_ndframe_axes() : axes count" << std::endl;
16606                throw std::runtime_error("pd_test_ndframe_axes failed: axes count");
16607            }
16608
16609            passed = axes[0]->size() == 3;
16610            if (!passed) {
16611                std::cout << "  [FAIL] : in pd_test_ndframe_axes() : axis size" << std::endl;
between (pd_test_1_all.cpp:19258)
19248                throw std::runtime_error("pd_test_series_comparison failed: greater than");
19249            }
19250
19251            auto eq2 = s == 2;
19252            passed = eq2[1] == true && eq2[0] == false;
19253            if (!passed) {
19254                std::cout << "  [FAIL] : in pd_test_series_comparison() : equals failed" << std::endl;
19255                throw std::runtime_error("pd_test_series_comparison failed: equals");
19256            }
19257
19258            auto between_result = s.between(2, 4);
19259            passed = between_result[0] == false && between_result[1] == true && between_result[3] == true;
19260            if (!passed) {
19261                std::cout << "  [FAIL] : in pd_test_series_comparison() : between failed" << std::endl;
19262                throw std::runtime_error("pd_test_series_comparison failed: between");
19263            }
19264
19265            std::cout << " -> tests passed" << std::endl;
19266        }
19267
19268        void pd_test_series_transformations() {
canonical_dtype_name (pd_test_5_all.cpp:86367)
86357void case_1_series_complex_dtype_name(int& local_fail) {
86358    std::cout << "-- case_1_series_complex_dtype_name\n";
86359    pandas::Series<cdouble> s({cdouble(1.0, 2.0), cdouble(3.0, 4.0)});
86360    const std::string dt = s.dtype_name();
86361    pandas_tests::check(dt == "complex128",
86362          "case_1_series_complex_dtype_name.dtype_is_complex128_got_" + dt, local_fail);
86363}
86364
86365void case_2_series_canonical_dtype_name(int& local_fail) {
86366    std::cout << "-- case_2_series_canonical_dtype_name\n";
86367    const std::string canon = pandas::Series<cdouble>::canonical_dtype_name();
86368    pandas_tests::check(canon == "complex128",
86369          "case_2_canonical_dtype_name_is_complex128_got_" + canon, local_fail);
86370}
86371
86372void case_3_empty_series_complex_dtype(int& local_fail) {
86373    std::cout << "-- case_3_empty_series_complex_dtype\n";
86374    pandas::Series<cdouble> s(std::vector<cdouble>{});
86375    const std::string dt = s.dtype_name();
86376    pandas_tests::check(dt == "complex128",
86377          "case_3_empty_series_complex_dtype.dtype_is_complex128_got_" + dt, local_fail);
case_when (pd_test_3_all.cpp:9129)
9119    }
9120
9121    std::cout << " -> tests passed" << std::endl;
9122}
9123
9124// ============================================================================
9125// Category 35: Plan 08 - Series case_when and rdivmod
9126// ============================================================================
9127
9128void pd_test_3_all_series_case_when() {
9129    std::cout << "========= Series.case_when() ======================";
9130
9131    std::vector<double> vals = {1.0, 2.0, 3.0, 4.0, 5.0};
9132    pandas::Series<double> s(vals, "test");
9133
9134    // Create conditions
9135    numpy::NDArray<numpy::bool_> cond1({5});
9136    numpy::NDArray<numpy::bool_> cond2({5});
9137    for (size_t i = 0; i < 5; ++i) {
9138        cond1.setElementAt({i}, numpy::bool_(vals[i] < 2.0));   // val < 2
9139        cond2.setElementAt({i}, numpy::bool_(vals[i] > 4.0));   // val > 4
cat (pd_test_3_all.cpp:16259)
16249    }
16250
16251    std::cout << " -> tests passed" << std::endl;
16252}
16253
16254void pd_test_categorical_fillna_params() {
16255    std::cout << "========= CategoricalArray fillna params =============";
16256
16257    // Create CategoricalArray using vector constructor with optional values
16258    std::vector<std::optional<std::string>> values = {"a", "b", std::nullopt, "a"};
16259    pandas::CategoricalArray cat(values);
16260
16261    // Test fillna with method and limit parameters (should compile and work)
16262    auto result = cat.fillna("b", "", std::nullopt, true);
16263
16264    bool passed = (result.size() == 4);
16265    if (!passed) {
16266        std::cout << "  [FAIL] : in pd_test_categorical_fillna_params() : fillna failed" << std::endl;
16267        throw std::runtime_error("pd_test_categorical_fillna_params failed");
16268    }
cat_ordered (pd_test_2_all.cpp:20373)
20363    std::vector<std::string> svals = {"a", "b", "a", "c"};
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
20377    check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
20381    std::cout << "pd_test_getitem_dispatch_index_propagation" << std::endl;
20382
20383    // Test DatetimeIndex freq propagation
ceil (pd_test_1_all.cpp:4949)
4939                throw std::runtime_error("pd_test_arithmetic_series_round failed: round failed");
4940            }
4941
4942            auto f = a.floor();
4943            passed = std::abs(f[0] - 1.0) < 0.001 && std::abs(f[2] - 3.0) < 0.001 && std::abs(f[3] - (-2.0)) < 0.001;
4944            if (!passed) {
4945                std::cout << "  [FAIL] : in pd_test_arithmetic_series_round() : floor failed" << std::endl;
4946                throw std::runtime_error("pd_test_arithmetic_series_round failed: floor failed");
4947            }
4948
4949            auto c = a.ceil();
4950            passed = std::abs(c[0] - 2.0) < 0.001 && std::abs(c[2] - 4.0) < 0.001 && std::abs(c[3] - (-1.0)) < 0.001;
4951            if (!passed) {
4952                std::cout << "  [FAIL] : in pd_test_arithmetic_series_round() : ceil failed" << std::endl;
4953                throw std::runtime_error("pd_test_arithmetic_series_round failed: ceil failed");
4954            }
4955
4956            // Round with decimals
4957            pandas::Series<double> b({1.234, 2.567, 3.891});
4958            auto r2 = b.round(2);
4959            passed = std::abs(r2[0] - 1.23) < 0.001 && std::abs(r2[1] - 2.57) < 0.001;
clear_cache (pd_test_1_all.cpp:19413)
19403            s.mean();
19404            s.min();
19405            s.max();
19406
19407            passed = s.has_cached_values() == true;
19408            if (!passed) {
19409                std::cout << "  [FAIL] : in pd_test_series_cache() : cache not populated" << std::endl;
19410                throw std::runtime_error("pd_test_series_cache failed: cache not populated");
19411            }
19412
19413            s.clear_cache();
19414            passed = s.has_cached_values() == false;
19415            if (!passed) {
19416                std::cout << "  [FAIL] : in pd_test_series_cache() : cache not cleared" << std::endl;
19417                throw std::runtime_error("pd_test_series_cache failed: cache not cleared");
19418            }
19419
19420            std::cout << " -> tests passed" << std::endl;
19421        }
19422
19423        void pd_test_series_string_repr() {
clip (pd_test_1_all.cpp:5099)
5089                throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: abs failed");
5090            }
5091
5092            val = a["A"].get_value_str(2);
5093            passed = std::abs(std::stod(val) - 3.0) < 0.001;
5094            if (!passed) {
5095                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_abs_clip() : abs for -3 failed" << std::endl;
5096                throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: abs for -3 failed");
5097            }
5098
5099            auto c = df.clip(-2.0, 2.0);
5100            val = c["A"].get_value_str(2);
5101            passed = std::abs(std::stod(val) - (-2.0)) < 0.001;  // -3 clipped to -2
5102            if (!passed) {
5103                std::cout << "  [FAIL] : in pd_test_arithmetic_dataframe_abs_clip() : clip lower failed" << std::endl;
5104                throw std::runtime_error("pd_test_arithmetic_dataframe_abs_clip failed: clip lower failed");
5105            }
5106
5107            val = c["A"].get_value_str(3);
5108            passed = std::abs(std::stod(val) - 2.0) < 0.001;  // 4 clipped to 2
5109            if (!passed) {
clone (pd_test_1_all.cpp:5776)
5766    std::cout << " -> tests passed" << std::endl;
5767}
5768
5769void pd_test_categorical_index_clone() {
5770    std::cout << "========= clone =======================================";
5771
5772    pandas::CategoricalArray arr({"p", "q", "r"});
5773    pandas::CategoricalIndex idx(arr, "original");
5774
5775    std::unique_ptr<pandas::IndexBase> cloned = idx.clone();
5776
5777    bool passed = (cloned != nullptr && cloned->size() == idx.size() &&
5778                   cloned->name() == idx.name());
5779    if (!passed) {
5780        std::cout << "  [FAIL] : in pd_test_categorical_index_clone()" << std::endl;
5781        throw std::runtime_error("pd_test_categorical_index_clone failed");
5782    }
5783
5784    std::cout << " -> tests passed" << std::endl;
5785}
corr (pd_test_1_all.cpp:4655)
4645        }
4646
4647        void pd_test_aggregation_dataframe_corr() {
4648            std::cout << "========= DataFrame corr ========================";
4649
4650            std::map<std::string, std::vector<double>> data;
4651            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
4652            data["B"] = {2.0, 4.0, 6.0, 8.0, 10.0};  // Perfect correlation
4653            pandas::DataFrame df(data);
4654
4655            auto corr_df = df.corr();
4656
4657            // Check dimensions
4658            bool passed = corr_df.nrows() == 2 && corr_df.ncols() == 2;
4659            if (!passed) {
4660                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_corr() : corr should be 2x2" << std::endl;
4661                throw std::runtime_error("pd_test_aggregation_dataframe_corr failed: corr should be 2x2");
4662            }
4663
4664            // Diagonal should be 1.0
4665            std::string aa = corr_df["A"].get_value_str(0);
cov (pd_test_1_all.cpp:4690)
4680            std::cout << " -> tests passed" << std::endl;
4681        }
4682
4683        void pd_test_aggregation_dataframe_cov() {
4684            std::cout << "========= DataFrame cov =========================";
4685
4686            std::map<std::string, std::vector<double>> data;
4687            data["A"] = {1.0, 2.0, 3.0};
4688            pandas::DataFrame df(data);
4689
4690            auto cov_df = df.cov();
4691
4692            // Check dimensions
4693            bool passed = cov_df.nrows() == 1 && cov_df.ncols() == 1;
4694            if (!passed) {
4695                std::cout << "  [FAIL] : in pd_test_aggregation_dataframe_cov() : cov should be 1x1" << std::endl;
4696                throw std::runtime_error("pd_test_aggregation_dataframe_cov failed: cov should be 1x1");
4697            }
4698
4699            // Var(A) = 1.0 with ddof=1
4700            std::string aa = cov_df["A"].get_value_str(0);
datetime_array (pd_test_extension_array.cpp:255)
245    }
246    pandas::Series<numpy::datetime64> s(dt_vec);
247    s.set_datetime_array(ea);
248    // to_frame() must propagate the EA into the resulting DataFrame's column.
249    pandas::DataFrame df = s.to_frame(std::optional<std::string>{"d"});
250    check(df.has_column("d"), "to_frame: column 'd' present");
251    auto& col = df["d"];
252    auto* col_dt = dynamic_cast<pandas::Series<numpy::datetime64>*>(&col);
253    check(col_dt != nullptr, "to_frame: column dynamic_casts to Series<datetime64>");
254    if (col_dt) {
255        const auto& da_opt = col_dt->datetime_array();
256        check(da_opt.has_value() && *da_opt,
257              "to_frame: column has datetime_array EA populated");
258        if (da_opt.has_value() && *da_opt) {
259            // Pointer equality: the SAME shared_ptr should propagate (no clone).
260            check((*da_opt).get() == ea.get(),
261                  "to_frame: datetime_array shared_ptr identity preserved");
262        }
263    }
264    return g_errors - errors_before;
265}
datetime_ns_to_string (pd_test_3_all.cpp:27760)
27750    }
27751
27752    if (fail == 0) std::cout << "    OK" << std::endl;
27753}
27754
27755void pd_test_astype_datetime_to_string() {
27756    std::cout << "  -- pd_test_astype_datetime_to_string --" << std::endl;
27757    int fail = 0;
27758
27759    double ns_val = 1577836800000000000.0;
27760    std::string formatted = pandas::Series<numpy::float64>::datetime_ns_to_string(ns_val);
27761    if (formatted != "2020-01-01") {
27762        std::cout << "    FAIL: expected '2020-01-01', got '" << formatted << "'" << std::endl;
27763        fail++;
27764    }
27765
27766    double ns_val2 = 1577836800000000000.0 + 12*3600000000000.0 + 30*60000000000.0 + 45*1000000000.0;
27767    std::string formatted2 = pandas::Series<numpy::float64>::datetime_ns_to_string(ns_val2);
27768    if (formatted2 != "2020-01-01 12:30:45") {
27769        std::cout << "    FAIL: expected '2020-01-01 12:30:45', got '" << formatted2 << "'" << std::endl;
27770        fail++;
dt (pd_test_3_all.cpp:18239)
18229    if (offset.freqstr() != "D") {
18230        std::cout << "  [FAIL] : Day freqstr() failed" << std::endl;
18231        throw std::runtime_error("pd_test_day_offset: freqstr() failed");
18232    }
18233    if (offset.name() != "Day") {
18234        std::cout << "  [FAIL] : Day name() failed" << std::endl;
18235        throw std::runtime_error("pd_test_day_offset: name() failed");
18236    }
18237
18238    // Test apply
18239    numpy::datetime64 dt("2020-01-15");
18240    auto result = offset.apply(dt);
18241    std::tm tm = result.toTm();
18242    if (tm.tm_mday != 20) {
18243        std::cout << "  [FAIL] : Day apply() failed, got day " << tm.tm_mday << std::endl;
18244        throw std::runtime_error("pd_test_day_offset: apply() failed");
18245    }
18246
18247    std::cout << " -> tests passed" << std::endl;
18248}
dt (pd_test_3_all.cpp:18239)
18229    if (offset.freqstr() != "D") {
18230        std::cout << "  [FAIL] : Day freqstr() failed" << std::endl;
18231        throw std::runtime_error("pd_test_day_offset: freqstr() failed");
18232    }
18233    if (offset.name() != "Day") {
18234        std::cout << "  [FAIL] : Day name() failed" << std::endl;
18235        throw std::runtime_error("pd_test_day_offset: name() failed");
18236    }
18237
18238    // Test apply
18239    numpy::datetime64 dt("2020-01-15");
18240    auto result = offset.apply(dt);
18241    std::tm tm = result.toTm();
18242    if (tm.tm_mday != 20) {
18243        std::cout << "  [FAIL] : Day apply() failed, got day " << tm.tm_mday << std::endl;
18244        throw std::runtime_error("pd_test_day_offset: apply() failed");
18245    }
18246
18247    std::cout << " -> tests passed" << std::endl;
18248}
dtype_name (pd_test_1_all.cpp:10104)
10094}
10095
10096void pd_test_extension_index_array_constructor() {
10097    std::cout << "========= array constructor =========================";
10098
10099    pandas::CategoricalArray arr({"apple", "banana", "apple", "cherry"});
10100    pandas::CategoricalIndex idx(arr, "fruits");
10101
10102    bool passed = (idx.size() == 4 && !idx.empty() &&
10103                   idx.name().has_value() && *idx.name() == "fruits" &&
10104                   idx.dtype_name() == "category");
10105    if (!passed) {
10106        std::cout << "  [FAIL] : in pd_test_extension_index_array_constructor() : array constructor check failed" << std::endl;
10107        throw std::runtime_error("pd_test_extension_index_array_constructor failed");
10108    }
10109
10110    std::cout << " -> tests passed" << std::endl;
10111}
10112
10113void pd_test_extension_index_copy_constructor() {
10114    std::cout << "========= copy constructor =========================";
dtype_name_full (pd_test_5_all.cpp:26384)
26374    pandas::DataFrame df;
26375    df.add_column<std::string>("group", {"A", "A", "B"});
26376    df.add_column<bool>("flag", {true, false, true});
26377    // Promote the column's dtype override to the PandasPython-origin sub-type.
26378    df.set_column_dtype("flag", "object:bool");
26379
26380    // Pre-check: dtype_name strips the colon, dtype_name_full keeps it.
26381    pandas_tests::check(df["flag"].dtype_name() == "object",
26382          "b21.pre: df[flag].dtype_name()==object (got '" +
26383          df["flag"].dtype_name() + "')", local_fail);
26384    pandas_tests::check(df["flag"].dtype_name_full() == "object:bool",
26385          "b21.pre: df[flag].dtype_name_full()==object:bool (got '" +
26386          df["flag"].dtype_name_full() + "')", local_fail);
26387
26388    auto gg = df.groupby("group").get_group("A");
26389
26390    // FIX VERIFIED: Option 2 via iloc_rows + take_indices preserves the
26391    // dtype_override ("object:bool"); dtype_name() strips the colon and
26392    // returns "object".
26393    std::string gg_dt = gg["flag"].dtype_name();
26394    std::string gg_dt_full = gg["flag"].dtype_name_full();
dtype_override (pd_test_5_all.cpp:26391)
26381    pandas_tests::check(df["flag"].dtype_name() == "object",
26382          "b21.pre: df[flag].dtype_name()==object (got '" +
26383          df["flag"].dtype_name() + "')", local_fail);
26384    pandas_tests::check(df["flag"].dtype_name_full() == "object:bool",
26385          "b21.pre: df[flag].dtype_name_full()==object:bool (got '" +
26386          df["flag"].dtype_name_full() + "')", local_fail);
26387
26388    auto gg = df.groupby("group").get_group("A");
26389
26390    // FIX VERIFIED: Option 2 via iloc_rows + take_indices preserves the
26391    // dtype_override ("object:bool"); dtype_name() strips the colon and
26392    // returns "object".
26393    std::string gg_dt = gg["flag"].dtype_name();
26394    std::string gg_dt_full = gg["flag"].dtype_name_full();
26395    pandas_tests::check(gg_dt == "object",
26396          "b21.gg[flag].dtype_name()==object (FIX VERIFIED; got '" +
26397          gg_dt + "')", local_fail);
26398    pandas_tests::check(gg_dt_full == "object:bool",
26399          "b21.gg[flag].dtype_name_full()==object:bool (FIX VERIFIED; got '" +
26400          gg_dt_full + "')", local_fail);
26401}
empty (pd_test_1_all.cpp:941)
931#include "../pandas/pd_config.h"
932
933namespace dataframe_tests {
934
935namespace dataframe_tests_config {
936
937    void pd_test_config_version() {
938        std::cout << "========= df_config: version info ======================= ";
939        const char* version = pandas::DataFrameInfo::version();
940        if (version == nullptr || std::string(version).empty()) {
941            std::cout << "[FAIL] : in pd_test_config_version() : version is null or empty" << std::endl;
942            throw std::runtime_error("pd_test_config_version failed: version is null or empty");
943        }
944        std::cout << "-> tests passed" << std::endl;
945    }
946
947    void pd_test_config_na_repr() {
948        std::cout << "========= df_config: NA representation ======================= ";
949        const char* na_repr = pandas::DataFrameConfig::get_na_repr();
950        if (na_repr == nullptr) {
factorize (pd_test_1_all.cpp:1353)
1343        // unique
1344        auto uniq = arr.unique();
1345        // Should have: NaT, 2023-01-01, 2023-06-15 (3 unique values)
1346        if (uniq.size() != 3) {
1347            std::cout << "  [FAIL] : unique size should be 3, got " << uniq.size() << std::endl;
1348            throw std::runtime_error("pd_test_datetime_array_unique failed: size");
1349        }
1350
1351        // factorize
1352        auto [codes, uniques] = arr.factorize();
1353        // Codes for NaT should be -1
1354        if (codes.getElementAt({3}) != -1) {
1355            std::cout << "  [FAIL] : factorize: NaT code should be -1" << std::endl;
1356            throw std::runtime_error("pd_test_datetime_array_unique failed: NaT code");
1357        }
1358        // Same values should have same codes
1359        if (codes.getElementAt({0}) != codes.getElementAt({2})) {
1360            std::cout << "  [FAIL] : factorize: 2023-01-01 values should have same code" << std::endl;
1361            throw std::runtime_error("pd_test_datetime_array_unique failed: same code");
1362        }
file (pd_test_2_all.cpp:3463)
3453                {"C", {100, 200, 300, 400, 500}}
3454            };
3455
3456            pandas::DataFrame df(data);
3457
3458            // Export to Excel
3459            std::string filepath = "temp/pd_test_excel_basic.xlsx";
3460            df.to_excel(filepath);
3461
3462            // Verify file was created
3463            std::ifstream file(filepath, std::ios::binary);
3464            if (!file.good()) {
3465                std::cout << "  [FAIL] : in pd_test_excel_basic() : File was not created" << std::endl;
3466                throw std::runtime_error("pd_test_excel_basic failed: file not created");
3467            }
3468
3469            // Check file size is reasonable (valid XLSX should be > 1KB)
3470            file.seekg(0, std::ios::end);
3471            auto size = file.tellg();
3472            if (size < 1000) {
3473                std::cout << "  [FAIL] : in pd_test_excel_basic() : File size too small: " << size << std::endl;
file (pd_test_2_all.cpp:3463)
3453                {"C", {100, 200, 300, 400, 500}}
3454            };
3455
3456            pandas::DataFrame df(data);
3457
3458            // Export to Excel
3459            std::string filepath = "temp/pd_test_excel_basic.xlsx";
3460            df.to_excel(filepath);
3461
3462            // Verify file was created
3463            std::ifstream file(filepath, std::ios::binary);
3464            if (!file.good()) {
3465                std::cout << "  [FAIL] : in pd_test_excel_basic() : File was not created" << std::endl;
3466                throw std::runtime_error("pd_test_excel_basic failed: file not created");
3467            }
3468
3469            // Check file size is reasonable (valid XLSX should be > 1KB)
3470            file.seekg(0, std::ios::end);
3471            auto size = file.tellg();
3472            if (size < 1000) {
3473                std::cout << "  [FAIL] : in pd_test_excel_basic() : File size too small: " << size << std::endl;
file (pd_test_2_all.cpp:3463)
3453                {"C", {100, 200, 300, 400, 500}}
3454            };
3455
3456            pandas::DataFrame df(data);
3457
3458            // Export to Excel
3459            std::string filepath = "temp/pd_test_excel_basic.xlsx";
3460            df.to_excel(filepath);
3461
3462            // Verify file was created
3463            std::ifstream file(filepath, std::ios::binary);
3464            if (!file.good()) {
3465                std::cout << "  [FAIL] : in pd_test_excel_basic() : File was not created" << std::endl;
3466                throw std::runtime_error("pd_test_excel_basic failed: file not created");
3467            }
3468
3469            // Check file size is reasonable (valid XLSX should be > 1KB)
3470            file.seekg(0, std::ios::end);
3471            auto size = file.tellg();
3472            if (size < 1000) {
3473                std::cout << "  [FAIL] : in pd_test_excel_basic() : File size too small: " << size << std::endl;
filter (pd_test_3_all.cpp:2805)
2795        threw = true;
2796    }
2797    if (!threw) {
2798        throw std::runtime_error("bool_() should throw for multi-element DataFrame");
2799    }
2800
2801    std::cout << " -> tests passed" << std::endl;
2802}
2803
2804void pd_test_3_all_df_filter() {
2805    std::cout << "========= DataFrame.filter() =============================";
2806
2807    std::map<std::string, std::vector<double>> data = {
2808        {"col_a", {1.0, 2.0, 3.0}},
2809        {"col_b", {4.0, 5.0, 6.0}},
2810        {"other", {7.0, 8.0, 9.0}}
2811    };
2812    pandas::DataFrame df(data);
2813
2814    // Test filter by items
2815    pandas::DataFrame filtered_items = df.filter({"col_a", "col_b"});
filter_by_bool_mask (pd_test_5_all.cpp:92595)
92585    std::vector<std::vector<double>> data = {{1.0, 2.0}};
92586    std::vector<std::string> cols = {"x", "y"};
92587    return pandas::DataFrame::from_records(data, cols);
92588}
92589
92590void case_1_filter_mask_f64() {
92591    std::cout << "-- case_1_filter_mask_f64()\n";
92592    int local_fail = 0;
92593    auto s = make_f64_series_5();
92594    auto mask = make_mask_TFTFT();
92595    auto result = s.filter_by_bool_mask(mask);
92596    pandas_tests::check(result.size() == 3,
92597        "f_test_25_filter_mask_f64_393211.size_eq_3", local_fail);
92598    pandas_tests::check(result.iat(0) == 1.0,
92599        "f_test_25_filter_mask_f64_393211.iat0_eq_1", local_fail);
92600    pandas_tests::check(result.iat(1) == 3.0,
92601        "f_test_25_filter_mask_f64_393211.iat1_eq_3", local_fail);
92602    pandas_tests::check(result.iat(2) == 5.0,
92603        "f_test_25_filter_mask_f64_393211.iat2_eq_5", local_fail);
92604}
filter_by_bool_series (pd_test_5_all.cpp:92669)
92659    pandas_tests::check(threw,
92660        "f_test_25_filter_mask_length_mismatch_throws_604411.value_error",
92661        local_fail);
92662}
92663
92664void case_6_filter_bool_series_happy() {
92665    std::cout << "-- case_6_filter_bool_series_happy()\n";
92666    int local_fail = 0;
92667    auto s = make_f64_series_5();
92668    auto cond = make_bool_series_TFTFT();
92669    auto result = s.filter_by_bool_series(cond);
92670    pandas_tests::check(result.size() == 3,
92671        "f_test_25_filter_bool_series_happy_490201.size_eq_3", local_fail);
92672    pandas_tests::check(result.iat(0) == 1.0,
92673        "f_test_25_filter_bool_series_happy_490201.iat0_eq_1", local_fail);
92674    pandas_tests::check(result.iat(1) == 3.0,
92675        "f_test_25_filter_bool_series_happy_490201.iat1_eq_3", local_fail);
92676}
92677
92678void case_7_filter_bool_series_mismatch_throws() {
92679    std::cout << "-- case_7_filter_bool_series_mismatch_throws()\n";
floor (pd_test_1_all.cpp:4942)
4932            pandas::Series<double> a({1.4, 2.5, 3.6, -1.4, -2.5});
4933
4934            auto r = a.round();
4935            bool passed = std::abs(r[0] - 1.0) < 0.001 && std::abs(r[2] - 4.0) < 0.001;
4936            if (!passed) {
4937                std::cout << "  [FAIL] : in pd_test_arithmetic_series_round() : round failed" << std::endl;
4938                throw std::runtime_error("pd_test_arithmetic_series_round failed: round failed");
4939            }
4940
4941            auto f = a.floor();
4942            passed = std::abs(f[0] - 1.0) < 0.001 && std::abs(f[2] - 3.0) < 0.001 && std::abs(f[3] - (-2.0)) < 0.001;
4943            if (!passed) {
4944                std::cout << "  [FAIL] : in pd_test_arithmetic_series_round() : floor failed" << std::endl;
4945                throw std::runtime_error("pd_test_arithmetic_series_round failed: floor failed");
4946            }
4947
4948            auto c = a.ceil();
4949            passed = std::abs(c[0] - 2.0) < 0.001 && std::abs(c[2] - 4.0) < 0.001 && std::abs(c[3] - (-1.0)) < 0.001;
4950            if (!passed) {
4951                std::cout << "  [FAIL] : in pd_test_arithmetic_series_round() : ceil failed" << std::endl;
func (pd_test_3_all.cpp:13837)
13827// ============================================================================
13828// Read Stubs Tests (verify they throw correctly)
13829// ============================================================================
13830
13831void pd_test_top_level_read_stubs() {
13832    std::cout << "========= read_* stubs ================================";
13833
13834    // Test that read functions throw as expected (they are stubs)
13835    auto test_throws = [](const std::string& name, auto func) {
13836        try {
13837            func();
13838            std::cout << "  [FAIL] : " << name << " should throw" << std::endl;
13839            return false;
13840        } catch (const std::exception&) {
13841            return true;
13842        }
13843    };
13844
13845    bool all_passed = true;
13846    all_passed &= test_throws("read_clipboard", []() { pandas::read_clipboard(); });
13847    all_passed &= test_throws("read_excel", []() { pandas::read_excel("test.xlsx"); });
has_cached_values (pd_test_1_all.cpp:19395)
19385            }
19386
19387            std::cout << " -> tests passed" << std::endl;
19388        }
19389
19390        void pd_test_series_cache() {
19391            std::cout << "========= cache management =========================================";
19392
19393            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
19394
19395            bool passed = s.has_cached_values() == false;
19396            if (!passed) {
19397                std::cout << "  [FAIL] : in pd_test_series_cache() : initial cache not empty" << std::endl;
19398                throw std::runtime_error("pd_test_series_cache failed: initial cache not empty");
19399            }
19400
19401            // Trigger cache
19402            s.sum();
19403            s.mean();
19404            s.min();
19405            s.max();
has_cat_categories (pd_test_2_all.cpp:20372)
20362    pandas::DataFrame df;
20363    std::vector<std::string> svals = {"a", "b", "a", "c"};
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
20377    check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
20378}
20379
20380void pd_test_getitem_dispatch_index_propagation() {
20381    std::cout << "pd_test_getitem_dispatch_index_propagation" << std::endl;
has_mask (pd_test_3_all.cpp:27708)
27698    auto* bool_s = dynamic_cast<pandas::Series<numpy::bool_>*>(result.get());
27699    if (!bool_s) {
27700        std::cout << "    FAIL: expected Series<bool_>" << std::endl;
27701        fail++;
27702    } else {
27703        if (bool_s->dtype_name() != "boolean") {
27704            std::cout << "    FAIL: dtype should be boolean, got " << bool_s->dtype_name() << std::endl;
27705            fail++;
27706        }
27707        if (!bool_s->has_mask()) {
27708            std::cout << "    FAIL: should have mask for NA" << std::endl;
27709            fail++;
27710        } else {
27711            if (!bool_s->mask_at(2)) {
27712                std::cout << "    FAIL: position 2 should be masked (NA)" << std::endl;
27713                fail++;
27714            }
27715        }
27716    }
has_multiindex (pd_test_1_all.cpp:27019)
27009            std::map<std::string, std::vector<std::string>> data = {
27010                {"A", {"a", "a", "b", "b"}},
27011                {"B", {"x", "x", "y", "y"}}
27012            };
27013            pandas::DataFrame df(data);
27014
27015            auto result = df.value_counts();
27016            auto& counts = std::get<pandas::Series<numpy::int64>>(result);
27017
27018            if (!counts.has_multiindex()) {
27019                std::cout << "  [FAIL] : expected MultiIndex" << std::endl;
27020                throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: no multiindex");
27021            }
27022
27023            const auto& midx = counts.multiindex();
27024
27025            // Should have 2 levels
27026            if (midx.nlevels() != 2) {
27027                std::cout << "  [FAIL] : expected 2 levels, got " << midx.nlevels() << std::endl;
27028                throw std::runtime_error("pd_test_value_counts_multiindex_levels failed: wrong nlevels");
hasnans (pd_test_1_all.cpp:5363)
5353void pd_test_categorical_index_from_codes() {
5354    std::cout << "========= from_codes =================================";
5355
5356    std::vector<numpy::int32> codes = {0, 1, 0, 2, -1};  // -1 = NA
5357    std::vector<std::string> categories = {"low", "medium", "high"};
5358
5359    pandas::CategoricalIndex idx = pandas::CategoricalIndex::from_codes(codes, categories, true, "level");
5360
5361    bool passed = (idx.size() == 5 && idx.num_categories() == 3 &&
5362                   idx.ordered() && idx.name().has_value() && *idx.name() == "level" &&
5363                   idx.hasnans());  // has NA from code -1
5364    if (!passed) {
5365        std::cout << "  [FAIL] : in pd_test_categorical_index_from_codes()" << std::endl;
5366        throw std::runtime_error("pd_test_categorical_index_from_codes failed");
5367    }
5368
5369    std::cout << " -> tests passed" << std::endl;
5370}
5371
5372void pd_test_categorical_index_simple_new() {
5373    std::cout << "========= _simple_new =================================";
index (pd_test_1_all.cpp:6680)
6670        void pd_test_dataframe_index_ops() {
6671            std::cout << "========= index operations =================";
6672
6673            // Test set_axis (rows)
6674            {
6675                std::map<std::string, std::vector<int>> data;
6676                data["A"] = {1, 2, 3};
6677                pandas::DataFrame df(data);
6678
6679                auto renamed = df.set_axis({"x", "y", "z"}, 0);
6680                std::string idx0 = renamed.index().get_value_str(0);
6681                if (idx0 != "x") {
6682                    std::cout << "  [FAIL] : in pd_test_dataframe_index_ops() : set_axis first label should be 'x'" << std::endl;
6683                    throw std::runtime_error("pd_test_dataframe_index_ops failed: set_axis");
6684                }
6685            }
6686
6687            // Test set_axis (columns)
6688            {
6689                std::map<std::string, std::vector<int>> data;
6690                data["A"] = {1, 2};
index_mut (pd_test_5_all.cpp:40329)
40319    pandas_tests::check(r.string_na_sentinel_disabled() == true,
40320                        "case4.string_na_sentinel_disabled_propagates", local_fail);
40321
40322    std::cout << "  source flag=" << s.string_na_sentinel_disabled()
40323              << " result flag=" << r.string_na_sentinel_disabled() << "\n";
40324}
40325
40326void case_5_index_name_propagates(int& local_fail) {
40327    std::cout << "----- case_5_index_name_propagates -----\n";
40328    auto s = make_series_3<std::int64_t>({10, 20, 30});
40329    s.index_mut().set_name(std::optional<std::string>("idx_name"));
40330
40331    auto r = s.reindex({"0", "1", "2"});
40332
40333    auto src_name = s.index().name();
40334    auto res_name = r.index().name();
40335    pandas_tests::check(res_name.has_value(),
40336                        "case5.index_name_present_after_reindex", local_fail);
40337    pandas_tests::check(res_name.has_value() && *res_name == "idx_name",
40338                        "case5.index_name_value_is_idx_name", local_fail);
info (pd_test_1_all.cpp:7122)
7112            }
7113            if (!empty_params_error) {
7114                std::cout << "  [FAIL] : select_dtypes empty params should throw" << std::endl;
7115                throw std::runtime_error("pd_test_dataframe_select_dtypes failed: empty params error");
7116            }
7117
7118            std::cout << " -> tests passed" << std::endl;
7119        }
7120
7121        // =====================================================================
7122        // Test: info() method
7123        // =====================================================================
7124        void pd_test_dataframe_info() {
7125            std::cout << "========= info ========================";
7126
7127            // Test basic info() with stringstream
7128            std::map<std::string, std::vector<int>> data = {
7129                {"A", {1, 2, 3, 4, 5}},
7130                {"B", {10, 20, 30, 40, 50}},
7131                {"C", {100, 200, 300, 400, 500}}
7132            };
item (pd_test_3_all.cpp:3712)
3702    // Test is_interval (always false for base Index)
3703    if (int_idx.is_interval()) {
3704        throw std::runtime_error("base Index should not be interval");
3705    }
3706
3707    std::cout << " -> tests passed" << std::endl;
3708}
3709
3710void pd_test_3_all_index_item() {
3711    std::cout << "========= Index.item() =============================";
3712
3713    pandas::Index<numpy::int64> idx1({42});
3714    numpy::int64 val = idx1.item();
3715
3716    if (val != 42) {
3717        throw std::runtime_error("item() should return 42");
3718    }
3719
3720    // Test error for size != 1
3721    pandas::Index<numpy::int64> idx2({1, 2, 3});
memcpy (pd_test_5_all.cpp:33658)
33648    }
33649    std::cout << " -> tests passed" << std::endl;
33650}
33651
33652// --- f_test_formatter_to_chars_9.cpp ---
33653
33654namespace f_test_formatter_to_chars_9_ns {
33655
33656static double bits_to_double(std::uint64_t bits) {
33657    double v;
33658    std::memcpy(&v, &bits, sizeof(v));
33659    return v;
33660}
33661
33662static int format_current(char* buf, std::size_t bufsz, int digits, double v) {
33663    if (digits < 0 || !std::isfinite(v)) {
33664        return std::snprintf(buf, bufsz, "%.*f", digits, v);
33665    }
33666    long double scale = 1.0L;
33667    for (int k = 0; k < digits; ++k) scale *= 10.0L;
33668    long double scaled = static_cast<long double>(v) * scale;
memory_usage (pd_test_1_all.cpp:27063)
27053        }
27054
27055        std::cout << "====================================== [OK] pd_test_value_counts test suite ========================== " << std::endl;
27056        return 0;
27057    }
27058
27059} // namespace dataframe_tests
27060// ------------------- pd_test_value_counts.cpp (end) -----------------------------
27061
27062// ------------------- pd_test_memory_usage.cpp (start) -----------------------------
27063// Tests for DataFrame.memory_usage() - pandas-compatible memory usage reporting
27064
27065namespace dataframe_tests {
27066    namespace dataframe_tests_memory_usage {
27067
27068        void pd_test_memory_usage_basic() {
27069            std::cout << "========= basic memory_usage =======================";
27070
27071            // Create a simple DataFrame with multiple columns
27072            std::map<std::string, std::vector<double>> data;
27073            data["A"] = {1.0, 2.0, 3.0, 4.0, 5.0};
mixed_tz_array (pd_test_extension_array.cpp:287)
277        dt_vec.push_back(numpy::datetime64(t->value(), numpy::DateTimeUnit::Nanosecond));
278    }
279    pandas::Series<numpy::datetime64> s(dt_vec);
280    s.set_mixed_tz_array(mta);
281    pandas::DataFrame df = s.to_frame(std::optional<std::string>{"m"});
282    check(df.has_column("m"), "to_frame mixed-tz: column 'm' present");
283    auto& col = df["m"];
284    auto* col_dt = dynamic_cast<pandas::Series<numpy::datetime64>*>(&col);
285    check(col_dt != nullptr, "to_frame mixed-tz: column is Series<datetime64>");
286    if (col_dt) {
287        const auto& mta_opt = col_dt->mixed_tz_array();
288        check(mta_opt.has_value() && *mta_opt,
289              "to_frame mixed-tz: column has mixed_tz_array EA populated");
290        if (mta_opt.has_value() && *mta_opt) {
291            check((*mta_opt).get() == mta.get(),
292                  "to_frame mixed-tz: shared_ptr identity preserved");
293        }
294    }
295    return g_errors - errors_before;
296}
name (pd_test_1_all.cpp:295)
285            throw std::runtime_error("pd_test_boolean_array_reductions failed: mean");
286        }
287
288        std::cout << " -> tests passed" << std::endl;
289    }
290
291    void pd_test_boolean_array_dtype() {
292        std::cout << "========= BooleanArray: dtype ======================= ";
293
294        pandas::BooleanArray arr;
295        if (arr.dtype().name() != "boolean") {
296            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype name should be 'boolean'" << std::endl;
297            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype name");
298        }
299
300        if (arr.dtype().kind() != "b") {
301            std::cout << "  [FAIL] : in pd_test_boolean_array_dtype() : dtype kind should be 'b'" << std::endl;
302            throw std::runtime_error("pd_test_boolean_array_dtype failed: dtype kind");
303        }
304
305        std::cout << " -> tests passed" << std::endl;
nbytes (pd_test_1_all.cpp:6214)
6204            }
6205
6206            // Test empty DataFrame
6207            pandas::DataFrame empty_df;
6208            if (!empty_df.empty()) {
6209                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should be empty" << std::endl;
6210                throw std::runtime_error("pd_test_dataframe_properties failed: should be empty");
6211            }
6212
6213            // Test nbytes > 0 for non-empty
6214            if (df.nbytes() == 0) {
6215                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : nbytes should be > 0" << std::endl;
6216                throw std::runtime_error("pd_test_dataframe_properties failed: nbytes should be > 0");
6217            }
6218
6219            // Test columns index
6220            if (df.columns().size() != 3) {
6221                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : columns size != 3" << std::endl;
6222                throw std::runtime_error("pd_test_dataframe_properties failed: columns size != 3");
6223            }
ndim (pd_test_1_all.cpp:6195)
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
6199
6200            // Test empty
6201            if (df.empty()) {
6202                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : should not be empty" << std::endl;
6203                throw std::runtime_error("pd_test_dataframe_properties failed: should not be empty");
6204            }
ravel (pd_test_3_all.cpp:2147)
2137        throw std::runtime_error("memory_usage shallow too small");
2138    }
2139    if (deep < shallow) {
2140        throw std::runtime_error("memory_usage deep should be >= shallow");
2141    }
2142
2143    std::cout << " -> tests passed" << std::endl;
2144}
2145
2146void pd_test_3_all_categorical_ravel_view() {
2147    std::cout << "========= CategoricalArray.ravel()/view() =============";
2148
2149    std::vector<std::optional<std::string>> values = {"a", "b", "c"};
2150    pandas::CategoricalArray arr(values);
2151
2152    auto raveled = arr.ravel();
2153    if (raveled.size() != 3 || !raveled.equals(arr)) {
2154        throw std::runtime_error("ravel failed");
2155    }
2156
2157    auto viewed = arr.view();
repeat (pd_test_3_all.cpp:2166)
2156    auto viewed = arr.view();
2157    if (viewed.size() != 3 || !viewed.equals(arr)) {
2158        throw std::runtime_error("view failed");
2159    }
2160
2161    std::cout << " -> tests passed" << std::endl;
2162}
2163
2164void pd_test_3_all_categorical_repeat() {
2165    std::cout << "========= CategoricalArray.repeat() ===================";
2166
2167    std::vector<std::optional<std::string>> values = {"a", "b"};
2168    pandas::CategoricalArray arr(values);
2169
2170    auto result = arr.repeat(3);
2171    if (result.size() != 6 || *result[0] != "a" || *result[2] != "a" ||
2172        *result[3] != "b" || *result[5] != "b") {
2173        throw std::runtime_error("repeat scalar failed");
2174    }
repr (pd_test_1_all.cpp:10906)
10896    std::cout << " -> tests passed" << std::endl;
10897}
10898
10899void pd_test_extension_index_repr() {
10900    std::cout << "========= repr =========================";
10901
10902    pandas::CategoricalArray arr({"a", "b", "c"});
10903    // Use ExtensionIndex<CategoricalArray> directly to test base class repr
10904    pandas::ExtensionIndex<pandas::CategoricalArray> idx(arr, "test");
10905
10906    std::string repr_str = idx.repr();
10907
10908    bool passed = (!repr_str.empty() && repr_str.find("ExtensionIndex") != std::string::npos);
10909    if (!passed) {
10910        std::cout << "  [FAIL] : in pd_test_extension_index_repr() : repr check failed" << std::endl;
10911        throw std::runtime_error("pd_test_extension_index_repr failed");
10912    }
10913
10914    std::cout << " -> tests passed" << std::endl;
10915}
resolve_multiindex_level (pd_test_3_all.cpp:23441)
23431    if (gb.group_keys_order().size() != 2)
23432        throw std::runtime_error("expected 2 groups");
23433    auto sums = gb.sum();
23434    if (sums[0] != 40.0 || sums[1] != 20.0)
23435        throw std::runtime_error("sum mismatch");
23436
23437    std::cout << " -> tests passed" << std::endl;
23438}
23439
23440void pd_test_groupby_resolve_level() {
23441    std::cout << "========= resolve_multiindex_level() ==================";
23442
23443    pandas::Series<numpy::float64> s({1.0, 2.0});
23444    std::vector<std::vector<std::string>> level_values = {{"a", "b"}, {"x", "y"}};
23445    std::vector<std::optional<std::string>> level_names = {"first", "second"};
23446    auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23447    s.set_multiindex(mi);
23448
23449    if (s.resolve_multiindex_level("first") != 0 || s.resolve_multiindex_level("second") != 1)
23450        throw std::runtime_error("string level resolution failed");
23451    if (s.resolve_multiindex_level(0) != 0 || s.resolve_multiindex_level(-1) != 1)
resolve_multiindex_level (pd_test_3_all.cpp:23441)
23431    if (gb.group_keys_order().size() != 2)
23432        throw std::runtime_error("expected 2 groups");
23433    auto sums = gb.sum();
23434    if (sums[0] != 40.0 || sums[1] != 20.0)
23435        throw std::runtime_error("sum mismatch");
23436
23437    std::cout << " -> tests passed" << std::endl;
23438}
23439
23440void pd_test_groupby_resolve_level() {
23441    std::cout << "========= resolve_multiindex_level() ==================";
23442
23443    pandas::Series<numpy::float64> s({1.0, 2.0});
23444    std::vector<std::vector<std::string>> level_values = {{"a", "b"}, {"x", "y"}};
23445    std::vector<std::optional<std::string>> level_names = {"first", "second"};
23446    auto mi = pandas::MultiIndex::from_arrays<std::string>(level_values, level_names);
23447    s.set_multiindex(mi);
23448
23449    if (s.resolve_multiindex_level("first") != 0 || s.resolve_multiindex_level("second") != 1)
23450        throw std::runtime_error("string level resolution failed");
23451    if (s.resolve_multiindex_level(0) != 0 || s.resolve_multiindex_level(-1) != 1)
result (pd_test_1_all.cpp:15406)
15396    data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397    data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400    mask.setElementAt({0}, numpy::bool_(false));
15401    mask.setElementAt({1}, numpy::bool_(false));
15402
15403    pandas::DatetimeArray arr(data, mask);
15404    pandas::DatetimeIndexBase idx(arr, "original");
15405
15406    // Create join result (int64 values)
15407    numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408    join_result.setElementAt({0}, numpy::int64(500LL));
15409    join_result.setElementAt({1}, numpy::int64(600LL));
15410    join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412    auto new_idx = idx._from_join_target(join_result);
15413
15414    bool passed = (new_idx.size() == 3 &&
15415                   new_idx.name().has_value() && *new_idx.name() == "original");
15416    if (!passed) {
result (pd_test_1_all.cpp:15406)
15396    data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397    data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400    mask.setElementAt({0}, numpy::bool_(false));
15401    mask.setElementAt({1}, numpy::bool_(false));
15402
15403    pandas::DatetimeArray arr(data, mask);
15404    pandas::DatetimeIndexBase idx(arr, "original");
15405
15406    // Create join result (int64 values)
15407    numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408    join_result.setElementAt({0}, numpy::int64(500LL));
15409    join_result.setElementAt({1}, numpy::int64(600LL));
15410    join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412    auto new_idx = idx._from_join_target(join_result);
15413
15414    bool passed = (new_idx.size() == 3 &&
15415                   new_idx.name().has_value() && *new_idx.name() == "original");
15416    if (!passed) {
round (pd_test_1_all.cpp:1688)
1678    void pd_test_floating_array_rounding() {
1679        std::cout << "========= FloatingArray: rounding ======================= ";
1680
1681        pandas::FloatingArray<double> arr({
1682            std::optional<double>(1.234),
1683            std::optional<double>(2.567),
1684            std::nullopt
1685        });
1686
1687        auto rounded = arr.round(2);
1688        if (std::abs(rounded[0].value() - 1.23) > 0.001 ||
1689            std::abs(rounded[1].value() - 2.57) > 0.001) {
1690            std::cout << "  [FAIL] : in pd_test_floating_array_rounding() : round(2)" << std::endl;
1691            throw std::runtime_error("pd_test_floating_array_rounding failed: round(2)");
1692        }
1693
1694        if (!rounded.is_na(2)) {
1695            std::cout << "  [FAIL] : in pd_test_floating_array_rounding() : round should preserve NA" << std::endl;
1696            throw std::runtime_error("pd_test_floating_array_rounding failed: NA preservation");
1697        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
s (pd_test_1_all.cpp:4524)
4514#include <vector>
4515#include "../pandas/pd_dataframe.h"
4516#include "../pandas/pd_series.h"
4517
4518namespace dataframe_tests {
4519    namespace dataframe_tests_aggregation {
4520
4521        void pd_test_aggregation_series_sem() {
4522            std::cout << "========= Series sem ============================";
4523
4524            pandas::Series<double> s({1.0, 2.0, 3.0, 4.0, 5.0});
4525            auto sem_val = s.sem();
4526            // std(ddof=1) = sqrt(2.5), sem = sqrt(2.5)/sqrt(5) ≈ 0.707
4527            bool passed = sem_val.has_value() && std::abs(*sem_val - 0.707) < 0.01;
4528            if (!passed) {
4529                std::cout << "  [FAIL] : in pd_test_aggregation_series_sem() : sem value incorrect" << std::endl;
4530                throw std::runtime_error("pd_test_aggregation_series_sem failed: sem value incorrect");
4531            }
4532
4533            std::cout << " -> tests passed" << std::endl;
4534        }
set_cat_categories (pd_test_2_all.cpp:20366)
20356    check(sub.columns().get_value_str(0) == "col", "dup col0 name");
20357    check(sub.columns().get_value_str(1) == "col", "dup col1 name");
20358}
20359
20360void pd_test_getitem_dispatch_category_metadata() {
20361    std::cout << "pd_test_getitem_dispatch_category_metadata" << std::endl;
20362    pandas::DataFrame df;
20363    std::vector<std::string> svals = {"a", "b", "a", "c"};
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
set_cat_ordered (pd_test_2_all.cpp:20367)
20357    check(sub.columns().get_value_str(1) == "col", "dup col1 name");
20358}
20359
20360void pd_test_getitem_dispatch_category_metadata() {
20361    std::cout << "pd_test_getitem_dispatch_category_metadata" << std::endl;
20362    pandas::DataFrame df;
20363    std::vector<std::string> svals = {"a", "b", "a", "c"};
20364    auto cs = std::make_unique<pandas::Series<std::string>>(svals, "cat");
20365    cs->set_dtype_override("category");
20366    cs->set_cat_categories({"a", "b", "c"});
20367    cs->set_cat_ordered(true);
20368    df.insert(0, "cat", std::move(cs), true);
20369
20370    auto s = df.get_column_as_string_series("cat");
20371    check(s.dtype_name() == "category", "cat dtype");
20372    check(s.has_cat_categories(), "cat has_categories");
20373    check(s.cat_ordered() == true, "cat ordered");
20374    auto cats = s.get_cat_categories();
20375    check(cats.size() == 3, "cat categories size");
20376    std::set<std::string> cat_set(cats.begin(), cats.end());
20377    check(cat_set.count("a") && cat_set.count("b") && cat_set.count("c"), "cat categories content");
set_datetime_array (pd_test_extension_array.cpp:247)
237        pandas::Timestamp(1577836800000000000LL),
238        pandas::Timestamp(1609459200000000000LL),
239    };
240    auto ea = std::make_shared<pandas::DatetimeArray>(
241        pandas::DatetimeArray::from_timestamps(ts, /*uniform_tz=*/""));
242    std::vector<numpy::datetime64> dt_vec;
243    for (const auto& t : ts) {
244        dt_vec.push_back(numpy::datetime64(t->value(), numpy::DateTimeUnit::Nanosecond));
245    }
246    pandas::Series<numpy::datetime64> s(dt_vec);
247    s.set_datetime_array(ea);
248    // to_frame() must propagate the EA into the resulting DataFrame's column.
249    pandas::DataFrame df = s.to_frame(std::optional<std::string>{"d"});
250    check(df.has_column("d"), "to_frame: column 'd' present");
251    auto& col = df["d"];
252    auto* col_dt = dynamic_cast<pandas::Series<numpy::datetime64>*>(&col);
253    check(col_dt != nullptr, "to_frame: column dynamic_casts to Series<datetime64>");
254    if (col_dt) {
255        const auto& da_opt = col_dt->datetime_array();
256        check(da_opt.has_value() && *da_opt,
257              "to_frame: column has datetime_array EA populated");
set_dtype_override (pd_test_2_all.cpp:20225)
20215    std::vector<numpy::float64> vals = {1.0, 2.0, 3.0};
20216    df.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals, "A"), true);
20217
20218    auto t = df.classify_column_access("A");
20219    check(t == pandas::DataFrame::ColumnAccessType::NumericColumn, "float64 -> NumericColumn");
20220
20221    // int64 column
20222    pandas::DataFrame df2;
20223    std::vector<numpy::int64> ivals = {10, 20, 30};
20224    auto iseries = std::make_unique<pandas::Series<numpy::int64>>(ivals, "B");
20225    iseries->set_dtype_override("int64");
20226    df2.insert(0, "B", std::move(iseries), true);
20227    auto t2 = df2.classify_column_access("B");
20228    check(t2 == pandas::DataFrame::ColumnAccessType::NumericColumn, "int64 -> NumericColumn");
20229}
20230
20231void pd_test_getitem_dispatch_classify_bool() {
20232    std::cout << "pd_test_getitem_dispatch_classify_bool" << std::endl;
20233    pandas::DataFrame df;
20234    std::vector<numpy::bool_> bvals = {true, false, true};
20235    df.insert(0, "flag", std::make_unique<pandas::Series<numpy::bool_>>(bvals, "flag"), true);
set_freq (pd_test_1_all.cpp:8254)
8244void pd_test_datetime_mixin_set_freq() {
8245    std::cout << "========= set_freq ====================================";
8246
8247    std::vector<std::optional<numpy::datetime64>> values = {
8248        numpy::datetime64(0LL, numpy::DateTimeUnit::Nanosecond)
8249    };
8250    pandas::DatetimeArray arr(values);
8251    pandas::DatetimeMixinIndex idx(arr);
8252
8253    idx.set_freq("D");
8254    auto f = idx.freq();
8255
8256    bool passed = (f.has_value() && *f == "D");
8257    if (!passed) {
8258        std::cout << "  [FAIL] : in pd_test_datetime_mixin_set_freq()" << std::endl;
8259        throw std::runtime_error("pd_test_datetime_mixin_set_freq failed");
8260    }
8261
8262    std::cout << " -> tests passed" << std::endl;
8263}
set_mask (pd_test_3_all.cpp:25879)
25869    std::cout << "  PASSED" << std::endl;
25870}
25871
25872void pd_test_cat_constructor_helpers_mask() {
25873    std::cout << "========= cat_constructor_helpers mask ==================" << std::endl;
25874    ::pandas::Series<std::string> s(std::vector<std::string>{"x", "y", "z"});
25875    ::numpy::NDArray<::numpy::bool_> mask(std::vector<size_t>{3});
25876    mask.setElementAt({0}, ::numpy::bool_(false));
25877    mask.setElementAt({1}, ::numpy::bool_(true));
25878    mask.setElementAt({2}, ::numpy::bool_(false));
25879    s.set_mask(mask);
25880    auto result = ::pandas::series_to_optional_string_vector(s);
25881    if (result.size() != 3) throw std::runtime_error("Expected size 3");
25882    if (!result[0].has_value()) throw std::runtime_error("Expected value at index 0");
25883    if (result[1].has_value()) throw std::runtime_error("Expected nullopt at index 1 (masked)");
25884    if (!result[2].has_value()) throw std::runtime_error("Expected value at index 2");
25885    std::cout << "  PASSED" << std::endl;
25886}
25887
25888void pd_test_cat_constructor_helpers_empty() {
25889    std::cout << "========= cat_constructor_helpers empty =================" << std::endl;
set_mixed_tz_array (pd_test_extension_array.cpp:280)
270        pandas::Timestamp(1577836800000000000LL, "UTC"),
271        pandas::Timestamp(1609459200000000000LL, "US/Eastern"),
272    };
273    auto mta = std::make_shared<pandas::MixedTzDatetimeArray>(
274        pandas::MixedTzDatetimeArray::from_timestamps(ts));
275    std::vector<numpy::datetime64> dt_vec;
276    for (const auto& t : ts) {
277        dt_vec.push_back(numpy::datetime64(t->value(), numpy::DateTimeUnit::Nanosecond));
278    }
279    pandas::Series<numpy::datetime64> s(dt_vec);
280    s.set_mixed_tz_array(mta);
281    pandas::DataFrame df = s.to_frame(std::optional<std::string>{"m"});
282    check(df.has_column("m"), "to_frame mixed-tz: column 'm' present");
283    auto& col = df["m"];
284    auto* col_dt = dynamic_cast<pandas::Series<numpy::datetime64>*>(&col);
285    check(col_dt != nullptr, "to_frame mixed-tz: column is Series<datetime64>");
286    if (col_dt) {
287        const auto& mta_opt = col_dt->mixed_tz_array();
288        check(mta_opt.has_value() && *mta_opt,
289              "to_frame mixed-tz: column has mixed_tz_array EA populated");
290        if (mta_opt.has_value() && *mta_opt) {
set_multiindex (pd_test_2_all.cpp:20409)
20399        check(s.get_freq().value() == "D", "freq value D");
20400    }
20401
20402    // Test MultiIndex propagation
20403    pandas::DataFrame df2;
20404    std::vector<numpy::float64> vals2 = {10.0, 20.0};
20405    df2.insert(0, "A", std::make_unique<pandas::Series<numpy::float64>>(vals2, "A"), true);
20406    std::vector<std::vector<std::string>> arrays = {{"x", "y"}, {"1", "2"}};
20407    std::vector<std::optional<std::string>> names = {std::string("first"), std::string("second")};
20408    auto mi = pandas::MultiIndex::from_arrays<std::string>(arrays, names);
20409    df2.set_multiindex(mi);
20410
20411    auto s2 = df2.extract_column_as_numeric_series("A");
20412    check(s2.has_multiindex(), "multiindex propagated");
20413}
20414
20415} // namespace dataframe_tests_getitem_dispatch
20416
20417int pd_test_getitem_dispatch_main() {
20418    std::cout << "====================================== pd_test_getitem_dispatch test suite ==========================" << std::endl;
20419    dataframe_tests_getitem_dispatch::g_pass = 0;
set_name (pd_test_1_all.cpp:11798)
11788                throw std::runtime_error("pd_test_index_vector_constructor failed");
11789            }
11790
11791            std::cout << " -> tests passed" << std::endl;
11792        }
11793
11794        void pd_test_index_copy_constructor() {
11795            std::cout << "========= copy constructor ============================";
11796
11797            pandas::Index<numpy::int64> idx1{1, 2, 3};
11798            idx1.set_name("original");
11799
11800            pandas::Index<numpy::int64> idx2(idx1);
11801
11802            bool passed = (idx2.size() == 3);
11803            passed = passed && (idx2.name().value() == "original");
11804            passed = passed && idx2.equals(idx1);
11805
11806            if (!passed) {
11807                std::cout << "  [FAIL] : in pd_test_index_copy_constructor() : copy failed" << std::endl;
11808                throw std::runtime_error("pd_test_index_copy_constructor failed");
shape (pd_test_1_all.cpp:6188)
6178            std::cout << "========= properties =======================";
6179
6180            std::map<std::string, std::vector<numpy::float64>> data;
6181            data["A"] = {1.0, 2.0, 3.0, 4.0};
6182            data["B"] = {5.0, 6.0, 7.0, 8.0};
6183            data["C"] = {9.0, 10.0, 11.0, 12.0};
6184
6185            pandas::DataFrame df(data);
6186
6187            // Test shape
6188            auto shape = df.shape();
6189            if (shape.size() != 2 || shape[0] != 4 || shape[1] != 3) {
6190                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : shape mismatch" << std::endl;
6191                throw std::runtime_error("pd_test_dataframe_properties failed: shape mismatch");
6192            }
6193
6194            // Test ndim
6195            if (df.ndim() != 2) {
6196                std::cout << "  [FAIL] : in pd_test_dataframe_properties() : ndim != 2" << std::endl;
6197                throw std::runtime_error("pd_test_dataframe_properties failed: ndim != 2");
6198            }
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
size (pd_test_1_all.cpp:22)
12#include "../pandas/pd_boolean_array.h"
13
14namespace dataframe_tests {
15
16namespace dataframe_tests_boolean_array {
17    void pd_test_boolean_array_constructors() {
18        std::cout << "========= BooleanArray: constructors ======================= ";
19
20        // Default constructor
21        pandas::BooleanArray arr1;
22        if (arr1.size() != 0) {
23            std::cout << "  [FAIL] : in pd_test_boolean_array_constructors() : default constructor size != 0" << std::endl;
24            throw std::runtime_error("pd_test_boolean_array_constructors failed: default constructor size != 0");
25        }
26
27        // Initializer list constructor
28        pandas::BooleanArray arr2({
29            std::optional<bool>(true),
30            std::optional<bool>(false),
31            std::nullopt,
32            std::optional<bool>(true)
sparse (pd_test_3_all.cpp:20627)
20617#include <cmath>
20618
20619#include "../pandas/pd_series.h"
20620
20621// CRITICAL: No using namespace directives
20622
20623namespace dataframe_tests {
20624namespace dataframe_tests_sparse_accessor {
20625
20626// ============================================================================
20627// Test sparse().density() and sparse().npoints()
20628// ============================================================================
20629
20630void pd_test_sparse_density_npoints() {
20631    std::cout << "========= Series.sparse().density/npoints() =============";
20632
20633    // Create a series with some zeros (sparse values)
20634    pandas::Series<numpy::float64> s({0.0, 1.0, 0.0, 2.0, 0.0, 3.0});
20635
20636    auto sparse = s.sparse(0.0);  // 0.0 is the fill value
str (pd_test_1_all.cpp:7137)
7127            // Test basic info() with stringstream
7128            std::map<std::string, std::vector<int>> data = {
7129                {"A", {1, 2, 3, 4, 5}},
7130                {"B", {10, 20, 30, 40, 50}},
7131                {"C", {100, 200, 300, 400, 500}}
7132            };
7133            pandas::DataFrame df(data);
7134
7135            std::ostringstream oss;
7136            df.info(oss);
7137            std::string output = oss.str();
7138
7139            // Verify key components
7140            if (output.find("<class 'pandas.core.frame.DataFrame'>") == std::string::npos) {
7141                std::cout << "  [FAIL] : info missing class name" << std::endl;
7142                throw std::runtime_error("pd_test_dataframe_info failed: missing class name");
7143            }
7144            if (output.find("RangeIndex:") == std::string::npos) {
7145                std::cout << "  [FAIL] : info missing RangeIndex" << std::endl;
7146                throw std::runtime_error("pd_test_dataframe_info failed: missing RangeIndex");
7147            }
truncate (pd_test_1_all.cpp:20467)
20457            std::vector<std::string> dates = {
20458                "2020-01-01",
20459                "2020-01-02",
20460                "2020-01-03",
20461                "2020-01-04",
20462                "2020-01-05"
20463            };
20464            df.set_index(std::make_unique<pandas::Index<std::string>>(dates));
20465
20466            // Truncate to keep only dates from 2020-01-02 to 2020-01-04
20467            pandas::DataFrame result = df.truncate("2020-01-02", "2020-01-04");
20468
20469            bool passed = (result.nrows() == 3);
20470
20471            if (!passed) {
20472                std::cout << "  [FAIL] : in pd_test_timeseries_truncate() : expected 3 rows, got "
20473                          << result.nrows() << std::endl;
20474                throw std::runtime_error("pd_test_timeseries_truncate failed");
20475            }
20476
20477            std::cout << " -> tests passed" << std::endl;
values (pd_test_1_all.cpp:364)
354        pandas::CategoricalArray arr1;
355        if (arr1.size() != 0) {
356            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
357            throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
358        }
359        if (arr1.ordered()) {
360            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
361            throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
362        }
363
364        // Constructor from values (infer categories)
365        std::vector<std::optional<std::string>> values = {
366            std::optional<std::string>("a"),
367            std::optional<std::string>("b"),
368            std::optional<std::string>("a"),
369            std::optional<std::string>("c")
370        };
371        pandas::CategoricalArray arr2(values);
372        if (arr2.size() != 4) {
373            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : values constructor size != 4" << std::endl;
374            throw std::runtime_error("pd_test_categorical_array_constructors failed: values constructor size != 4");