Lexer#

class pandas::Lexer#

Query engine class for expression evaluation.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use Lexer
Lexer obj;
// ... operations ...

Comparison#

Signature

Return Type

Location

Example

Token next_token()

Token

pd_query.h:203

Other Methods#

Signature

Return Type

Location

Example

void advance()

void

pd_query.h:106

char current() const

char

pd_query.h:97

View

explicit Lexer(std::string input) : input_(std::move(input))

explicit Lexer(std::string input) :

pd_query.h:201

char peek(size_t ahead = 1) const

char

pd_query.h:101

View

Token read_identifier()

Token

pd_query.h:116

Token read_number()

Token

pd_query.h:152

Token read_string(char quote)

Token

pd_query.h:178

void skip_whitespace()

void

pd_query.h:110

Token tok(TokenType::NUMBER, num)

Token

pd_query.h:173

Code Examples#

The following examples are extracted from the test suite.

current (pd_test_4_all.cpp:1140)
1130                      const std::string& actual) {
1131    int _f = 0;
1132    pandas_tests::check_str_ws(label, expected, actual, _f);
1133    if (_f > 0) throw std::runtime_error(label + ": str mismatch");
1134}
1135
1136// ----------------------------------------------------------------------------
1137// Case 1 — dtype.int32_df_nsmallest
1138// ----------------------------------------------------------------------------
1139void dtype_int32_df_nsmallest() {
1140    // Strategy B: synthesize the current (buggy) post-nsmallest state.
1141    // Column A is double because int32 is silently promoted inside
1142    // pandas::DataFrame::nsmallest today. Column B (from range(10)) stays
1143    // int64. Row index labels "2","6","4" are the original positions of the
1144    // 3 smallest A values, ties broken by first-occurrence.
1145    pandas::DataFrame df;
1146    df.add_column<double>("A", std::vector<double>{1.0, 2.0, 3.0});
1147    df.add_column<int64_t>("B", std::vector<int64_t>{2, 6, 4});
1148    df.set_index(std::make_unique<pandas::Index<std::string>>(
1149        std::vector<std::string>{"2", "6", "4"}));
1150    apply_default_display(df);
peek (pd_test_5_all.cpp:123633)
123623    if (!in.good()) return false;
123624    std::string field;
123625    bool in_quotes = false;
123626    char ch;
123627    bool any = false;
123628    while (in.get(ch)) {
123629        any = true;
123630        if (in_quotes) {
123631            if (ch == '"') {
123632                // Lookahead: doubled quote = literal quote.
123633                if (in.peek() == '"') { in.get(ch); field.push_back('"'); }
123634                else                  { in_quotes = false; }
123635            } else if (ch == '\r') {
123636                // Strip CR even inside quotes: the oracle CSV uses \r\n for
123637                // newlines inside quoted multiline cells, but `format_*` only
123638                // emits \n. Normalise here so byte-equality holds.
123639            } else {
123640                field.push_back(ch);
123641            }
123642        } else {
123643            if (ch == '"') {