CategoricalAccessor#

class pandas::CategoricalAccessor#

pandas C++ class.

Example#

#include <pandas/pandas.h>
using namespace pandas;

// Use CategoricalAccessor
CategoricalAccessor obj;
// ... operations ...

Data Manipulation#

Signature

Return Type

Location

Example

std::vector<std::string> rename_categories(const std::vector<std::string>& new_categories) const

std::vector<std::string>

pd_categorical_accessor.h:142

View

std::vector<std::string> rename_categories(const std::map<std::string, std::string>& mapping) const

std::vector<std::string>

pd_categorical_accessor.h:157

View

Arithmetic#

Signature

Return Type

Location

Example

std::vector<std::string> add_categories(const std::vector<std::string>& new_categories) const

std::vector<std::string>

pd_categorical_accessor.h:95

View

I/O#

Signature

Return Type

Location

Example

CategoricalArray to_categorical_array() const

CategoricalArray

pd_categorical_accessor.h:213

View

Other Methods#

Signature

Return Type

Location

Example

CategoricalAccessor as_ordered() const

CategoricalAccessor

pd_categorical_accessor.h:229

View

CategoricalAccessor as_unordered() const

CategoricalAccessor

pd_categorical_accessor.h:237

View

std::vector<std::string> categories() const

std::vector<std::string>

pd_categorical_accessor.h:79

View

std::vector<int32_t> codes() const

std::vector<int32_t>

pd_categorical_accessor.h:84

View

void compute_if_needed() const

void

pd_categorical_accessor.h:35

bool ordered() const

bool

pd_categorical_accessor.h:89

View

const ParentType& parent() const

const ParentType&

pd_categorical_accessor.h:76

explicit CategoricalAccessor(const ParentType& parent) : parent_(parent)

explicit CategoricalAccessor(const ParentType& parent) :

pd_categorical_accessor.h:73

std::vector<std::string> remove_categories(const std::vector<std::string>& removals) const

std::vector<std::string>

pd_categorical_accessor.h:107

View

std::vector<std::string> remove_unused_categories() const

std::vector<std::string>

pd_categorical_accessor.h:119

View

std::vector<std::string> reorder_categories( const std::vector<std::string>& new_categories, std::optional<bool> ordered = std::nullopt) const

std::vector<std::string>

pd_categorical_accessor.h:177

View

CategoricalAccessor result(\*this)

CategoricalAccessor

pd_categorical_accessor.h:230

View

CategoricalAccessor result(\*this)

CategoricalAccessor

pd_categorical_accessor.h:238

View

std::vector<std::string> set_categories( const std::vector<std::string>& new_categories, std::optional<bool> ordered = std::nullopt, bool rename = false) const

std::vector<std::string>

pd_categorical_accessor.h:199

View

Code Examples#

The following examples are extracted from the test suite.

rename_categories (pd_test_1_all.cpp:655)
645    void pd_test_categorical_array_rename_categories() {
646        std::cout << "========= CategoricalArray: rename_categories ======================= ";
647
648        std::vector<std::string> cats = {"a", "b"};
649        std::vector<numpy::int32> codes = {0, 1, 0};  // a, b, a
650        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
651
652        // Rename categories
653        std::vector<std::string> new_names = {"alpha", "beta"};
654        pandas::CategoricalArray result = arr.rename_categories(new_names);
655
656        // Check categories are renamed
657        const std::vector<std::string>& result_cats = result.categories();
658        if (result_cats[0] != "alpha" || result_cats[1] != "beta") {
659            std::cout << "  [FAIL] : in pd_test_categorical_array_rename_categories() : categories not renamed" << std::endl;
660            throw std::runtime_error("pd_test_categorical_array_rename_categories failed: categories not renamed");
661        }
662
663        // Values should now be renamed
664        std::optional<std::string> val = result[0];
rename_categories (pd_test_1_all.cpp:655)
645    void pd_test_categorical_array_rename_categories() {
646        std::cout << "========= CategoricalArray: rename_categories ======================= ";
647
648        std::vector<std::string> cats = {"a", "b"};
649        std::vector<numpy::int32> codes = {0, 1, 0};  // a, b, a
650        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
651
652        // Rename categories
653        std::vector<std::string> new_names = {"alpha", "beta"};
654        pandas::CategoricalArray result = arr.rename_categories(new_names);
655
656        // Check categories are renamed
657        const std::vector<std::string>& result_cats = result.categories();
658        if (result_cats[0] != "alpha" || result_cats[1] != "beta") {
659            std::cout << "  [FAIL] : in pd_test_categorical_array_rename_categories() : categories not renamed" << std::endl;
660            throw std::runtime_error("pd_test_categorical_array_rename_categories failed: categories not renamed");
661        }
662
663        // Values should now be renamed
664        std::optional<std::string> val = result[0];
add_categories (pd_test_1_all.cpp:555)
545    }
546
547    void pd_test_categorical_array_add_categories() {
548        std::cout << "========= CategoricalArray: add_categories ======================= ";
549
550        std::vector<std::string> cats = {"a", "b"};
551        std::vector<numpy::int32> codes = {0, 1, 0};
552        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
553
554        // Add new categories
555        pandas::CategoricalArray result = arr.add_categories({"c", "d"});
556        if (result.categories().size() != 4) {
557            std::cout << "  [FAIL] : in pd_test_categorical_array_add_categories() : new categories size != 4" << std::endl;
558            throw std::runtime_error("pd_test_categorical_array_add_categories failed: new categories size != 4");
559        }
560
561        // Original values should be preserved
562        std::optional<std::string> val = result[0];
563        if (!val.has_value() || *val != "a") {
564            std::cout << "  [FAIL] : in pd_test_categorical_array_add_categories() : value not preserved" << std::endl;
565            throw std::runtime_error("pd_test_categorical_array_add_categories failed: value not preserved");
to_categorical_array (pd_test_3_all.cpp:28619)
28609    fail += cgo_check(r3.ordered() == arr.ordered(), "ordered inherited");
28610    if (fail == 0) std::cout << "    OK" << std::endl;
28611    if (fail != 0) throw std::runtime_error("pd_test_cat_set_categories_ordered failed");
28612}
28613
28614void pd_test_cat_to_array_bridge() {
28615    std::cout << "  -- pd_test_cat_to_array_bridge --" << std::endl;
28616    int fail = 0;
28617    pandas::Series<std::string> s({"a", "b", "c", "a"}, "x");
28618    auto acc = s.cat();
28619    auto arr = acc.to_categorical_array();
28620    fail += cgo_check(arr.size() == 4, "size");
28621    const auto& cats = arr.categories();
28622    fail += cgo_check(cats.size() == 3, "3 categories");
28623    // Now set_categories via accessor delegation should filter
28624    auto newc = acc.set_categories({"a", "b"});
28625    fail += cgo_check(newc.size() == 2, "set_categories delegation returned 2");
28626    if (fail == 0) std::cout << "    OK" << std::endl;
28627    if (fail != 0) throw std::runtime_error("pd_test_cat_to_array_bridge failed");
28628}
as_ordered (pd_test_1_all.cpp:791)
781            unordered.min();
782        } catch (const std::exception&) {
783            threw = true;
784        }
785        if (!threw) {
786            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : unordered min should throw" << std::endl;
787            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: unordered min should throw");
788        }
789
790        // Test as_ordered / as_unordered
791        pandas::CategoricalArray reordered = unordered.as_ordered();
792        if (!reordered.ordered()) {
793            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : as_ordered failed" << std::endl;
794            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: as_ordered failed");
795        }
796
797        std::cout << " -> tests passed" << std::endl;
798    }
799
800    void pd_test_categorical_array_comparisons() {
801        std::cout << "========= CategoricalArray: comparisons ======================= ";
as_unordered (pd_test_1_all.cpp:778)
768        }
769
770        // Test max
771        std::optional<std::string> max_val = arr.max();
772        if (!max_val.has_value() || *max_val != "high") {
773            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775        }
776
777        // Test unordered throws for min/max
778        pandas::CategoricalArray unordered = arr.as_unordered();
779        bool threw = false;
780        try {
781            unordered.min();
782        } catch (const std::exception&) {
783            threw = true;
784        }
785        if (!threw) {
786            std::cout << "  [FAIL] : in pd_test_categorical_array_ordered_operations() : unordered min should throw" << std::endl;
787            throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: unordered min should throw");
788        }
categories (pd_test_1_all.cpp:389)
379        std::vector<std::optional<std::string>> vals = {
380            std::optional<std::string>("low"),
381            std::optional<std::string>("high"),
382            std::optional<std::string>("medium")
383        };
384        pandas::CategoricalArray arr3(vals, cats, true);  // ordered
385        if (!arr3.ordered()) {
386            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : should be ordered" << std::endl;
387            throw std::runtime_error("pd_test_categorical_array_constructors failed: should be ordered");
388        }
389        if (arr3.categories().size() != 3) {
390            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : categories size != 3" << std::endl;
391            throw std::runtime_error("pd_test_categorical_array_constructors failed: categories size != 3");
392        }
393
394        std::cout << " -> tests passed" << std::endl;
395    }
396
397    void pd_test_categorical_array_from_codes() {
398        std::cout << "========= CategoricalArray: from_codes ======================= ";
codes (pd_test_1_all.cpp:473)
463        std::cout << " -> tests passed" << std::endl;
464    }
465
466    void pd_test_categorical_array_codes_property() {
467        std::cout << "========= CategoricalArray: codes property ======================= ";
468
469        std::vector<std::string> cats = {"x", "y", "z"};
470        std::vector<numpy::int32> codes = {0, 1, 2, 1, 0};
471        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
472
473        numpy::NDArray<numpy::int32> arr_codes = arr.codes();
474        if (arr_codes.getSize() != 5) {
475            std::cout << "  [FAIL] : in pd_test_categorical_array_codes_property() : codes size != 5" << std::endl;
476            throw std::runtime_error("pd_test_categorical_array_codes_property failed: codes size != 5");
477        }
478
479        // Check codes match
480        for (size_t i = 0; i < codes.size(); ++i) {
481            if (arr_codes.getElementAt({i}) != codes[i]) {
482                std::cout << "  [FAIL] : in pd_test_categorical_array_codes_property() : code mismatch at " << i << std::endl;
483                throw std::runtime_error("pd_test_categorical_array_codes_property failed: code mismatch");
ordered (pd_test_1_all.cpp:359)
349    void pd_test_categorical_array_constructors() {
350        std::cout << "========= CategoricalArray: constructors ======================= ";
351
352        // Default constructor
353        pandas::CategoricalArray arr1;
354        if (arr1.size() != 0) {
355            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
356            throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
357        }
358        if (arr1.ordered()) {
359            std::cout << "  [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
360            throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
361        }
362
363        // Constructor from values (infer categories)
364        std::vector<std::optional<std::string>> values = {
365            std::optional<std::string>("a"),
366            std::optional<std::string>("b"),
367            std::optional<std::string>("a"),
368            std::optional<std::string>("c")
remove_categories (pd_test_1_all.cpp:591)
581    }
582
583    void pd_test_categorical_array_remove_categories() {
584        std::cout << "========= CategoricalArray: remove_categories ======================= ";
585
586        std::vector<std::string> cats = {"a", "b", "c"};
587        std::vector<numpy::int32> codes = {0, 1, 2, 1};  // a, b, c, b
588        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
589
590        // Remove 'c' - values with 'c' become NA
591        pandas::CategoricalArray result = arr.remove_categories({"c"});
592
593        if (result.categories().size() != 2) {
594            std::cout << "  [FAIL] : in pd_test_categorical_array_remove_categories() : categories size != 2" << std::endl;
595            throw std::runtime_error("pd_test_categorical_array_remove_categories failed: categories size != 2");
596        }
597
598        // Element at index 2 should now be NA (was 'c')
599        if (!result.is_na(2)) {
600            std::cout << "  [FAIL] : in pd_test_categorical_array_remove_categories() : removed category should be NA" << std::endl;
601            throw std::runtime_error("pd_test_categorical_array_remove_categories failed: removed category should be NA");
remove_unused_categories (pd_test_1_all.cpp:737)
727        std::cout << " -> tests passed" << std::endl;
728    }
729
730    void pd_test_categorical_array_remove_unused_categories() {
731        std::cout << "========= CategoricalArray: remove_unused_categories ======================= ";
732
733        std::vector<std::string> cats = {"a", "b", "c", "d"};
734        std::vector<numpy::int32> codes = {0, 0, 2};  // a, a, c (b and d unused)
735        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
736
737        pandas::CategoricalArray result = arr.remove_unused_categories();
738
739        // Only 'a' and 'c' should remain
740        if (result.categories().size() != 2) {
741            std::cout << "  [FAIL] : in pd_test_categorical_array_remove_unused_categories() : categories size != 2" << std::endl;
742            throw std::runtime_error("pd_test_categorical_array_remove_unused_categories failed: categories size != 2");
743        }
744
745        // Values should be preserved
746        std::optional<std::string> val0 = result[0];
747        std::optional<std::string> val2 = result[2];
reorder_categories (pd_test_1_all.cpp:695)
685    void pd_test_categorical_array_reorder_categories() {
686        std::cout << "========= CategoricalArray: reorder_categories ======================= ";
687
688        std::vector<std::string> cats = {"a", "b", "c"};
689        std::vector<numpy::int32> codes = {0, 1, 2};  // a, b, c
690        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
691
692        // Reorder categories
693        std::vector<std::string> new_order = {"c", "b", "a"};
694        pandas::CategoricalArray result = arr.reorder_categories(new_order);
695
696        // Check categories are reordered
697        const std::vector<std::string>& result_cats = result.categories();
698        if (result_cats[0] != "c" || result_cats[1] != "b" || result_cats[2] != "a") {
699            std::cout << "  [FAIL] : in pd_test_categorical_array_reorder_categories() : categories not reordered" << std::endl;
700            throw std::runtime_error("pd_test_categorical_array_reorder_categories failed: categories not reordered");
701        }
702
703        // Values should be preserved
704        std::optional<std::string> val0 = result[0];
result (pd_test_1_all.cpp:15406)
15396    data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397    data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400    mask.setElementAt({0}, numpy::bool_(false));
15401    mask.setElementAt({1}, numpy::bool_(false));
15402
15403    pandas::DatetimeArray arr(data, mask);
15404    pandas::DatetimeIndexBase idx(arr, "original");
15405
15406    // Create join result (int64 values)
15407    numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408    join_result.setElementAt({0}, numpy::int64(500LL));
15409    join_result.setElementAt({1}, numpy::int64(600LL));
15410    join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412    auto new_idx = idx._from_join_target(join_result);
15413
15414    bool passed = (new_idx.size() == 3 &&
15415                   new_idx.name().has_value() && *new_idx.name() == "original");
15416    if (!passed) {
result (pd_test_1_all.cpp:15406)
15396    data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397    data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399    numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400    mask.setElementAt({0}, numpy::bool_(false));
15401    mask.setElementAt({1}, numpy::bool_(false));
15402
15403    pandas::DatetimeArray arr(data, mask);
15404    pandas::DatetimeIndexBase idx(arr, "original");
15405
15406    // Create join result (int64 values)
15407    numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408    join_result.setElementAt({0}, numpy::int64(500LL));
15409    join_result.setElementAt({1}, numpy::int64(600LL));
15410    join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412    auto new_idx = idx._from_join_target(join_result);
15413
15414    bool passed = (new_idx.size() == 3 &&
15415                   new_idx.name().has_value() && *new_idx.name() == "original");
15416    if (!passed) {
set_categories (pd_test_1_all.cpp:623)
613    void pd_test_categorical_array_set_categories() {
614        std::cout << "========= CategoricalArray: set_categories ======================= ";
615
616        std::vector<std::string> cats = {"a", "b"};
617        std::vector<numpy::int32> codes = {0, 1, 0};  // a, b, a
618        pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
619
620        // Set new categories (values not in new categories become NA)
621        std::vector<std::string> new_cats = {"a", "c"};  // 'b' removed, 'c' added
622        pandas::CategoricalArray result = arr.set_categories(new_cats);
623
624        if (result.categories().size() != 2) {
625            std::cout << "  [FAIL] : in pd_test_categorical_array_set_categories() : categories size != 2" << std::endl;
626            throw std::runtime_error("pd_test_categorical_array_set_categories failed: categories size != 2");
627        }
628
629        // Element at index 1 should be NA (was 'b', now not in categories)
630        if (!result.is_na(1)) {
631            std::cout << "  [FAIL] : in pd_test_categorical_array_set_categories() : 'b' value should be NA" << std::endl;
632            throw std::runtime_error("pd_test_categorical_array_set_categories failed: 'b' value should be NA");