Parser#

class pandas::Parser#

Query engine class for expression evaluation.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use Parser
Parser obj;
// ... operations ...

Type Checking#

Signature

Return Type

Location

Example

static bool is_comparison_op(TokenType tok_type)

static bool

pd_query.h:422

Other Methods#

Signature

Return Type

Location

Example

void advance()

void

pd_query.h:407

Token& current()

Token&

pd_query.h:398

View

void expect(TokenType expected_type, const std::string& error_msg)

void

pd_query.h:415

expect(TokenType::RPAREN, "Expected ')' after expression")

pd_query.h:477

expect(TokenType::RBRACKET, "Expected ']' to close list literal")

pd_query.h:509

bool match(TokenType expected_type)

bool

pd_query.h:411

View

ListLiteral parse_list_literal()

ListLiteral

pd_query.h:490

Token& peek(size_t ahead = 1)

Token&

pd_query.h:402

View

explicit Parser(std::vector<Token> tokens) : tokens_(std::move(tokens))

explicit Parser(std::vector<Token> tokens) :

pd_query.h:705

Code Examples#

The following examples are extracted from the test suite.

current (pd_test_4_all.cpp:1140)
1130                      const std::string& actual) {
1131    int _f = 0;
1132    pandas_tests::check_str_ws(label, expected, actual, _f);
1133    if (_f > 0) throw std::runtime_error(label + ": str mismatch");
1134}
1135
1136// ----------------------------------------------------------------------------
1137// Case 1 — dtype.int32_df_nsmallest
1138// ----------------------------------------------------------------------------
1139void dtype_int32_df_nsmallest() {
1140    // Strategy B: synthesize the current (buggy) post-nsmallest state.
1141    // Column A is double because int32 is silently promoted inside
1142    // pandas::DataFrame::nsmallest today. Column B (from range(10)) stays
1143    // int64. Row index labels "2","6","4" are the original positions of the
1144    // 3 smallest A values, ties broken by first-occurrence.
1145    pandas::DataFrame df;
1146    df.add_column<double>("A", std::vector<double>{1.0, 2.0, 3.0});
1147    df.add_column<int64_t>("B", std::vector<int64_t>{2, 6, 4});
1148    df.set_index(std::make_unique<pandas::Index<std::string>>(
1149        std::vector<std::string>{"2", "6", "4"}));
1150    apply_default_display(df);
match (pd_test_2_all.cpp:1467)
1457        void pd_test_between_time_overnight() {
1458            std::cout << "========= DataFrame between_time: overnight range ======";
1459
1460            // Test overnight range (e.g., 23:00 to 01:00)
1461            std::map<std::string, std::vector<double>> data = {
1462                {"A", {1.0, 2.0, 3.0, 4.0, 5.0}}
1463            };
1464            pandas::DataFrame df(data);
1465
1466            std::vector<std::string> datetime_index = {
1467                "2018-04-09 00:30:00",  // Should match (before 01:00)
1468                "2018-04-09 12:00:00",  // Should NOT match
1469                "2018-04-09 22:00:00",  // Should NOT match
1470                "2018-04-09 23:30:00",  // Should match (after 23:00)
1471                "2018-04-10 00:00:00"   // Should match (at midnight, before 01:00)
1472            };
1473            df.set_index(std::make_unique<pandas::Index<std::string>>(datetime_index));
1474
1475            // Overnight range: 23:00 to 01:00
1476            auto result = df.between_time("23:00:00", "01:00:00");
peek (pd_test_5_all.cpp:123633)
123623    if (!in.good()) return false;
123624    std::string field;
123625    bool in_quotes = false;
123626    char ch;
123627    bool any = false;
123628    while (in.get(ch)) {
123629        any = true;
123630        if (in_quotes) {
123631            if (ch == '"') {
123632                // Lookahead: doubled quote = literal quote.
123633                if (in.peek() == '"') { in.get(ch); field.push_back('"'); }
123634                else                  { in_quotes = false; }
123635            } else if (ch == '\r') {
123636                // Strip CR even inside quotes: the oracle CSV uses \r\n for
123637                // newlines inside quoted multiline cells, but `format_*` only
123638                // emits \n. Normalise here so byte-equality holds.
123639            } else {
123640                field.push_back(ch);
123641            }
123642        } else {
123643            if (ch == '"') {