CategoricalAccessor#
-
class pandas::CategoricalAccessor#
pandas C++ class.
Example#
#include <pandas/pandas.h>
using namespace pandas;
// Use CategoricalAccessor
CategoricalAccessor obj;
// ... operations ...
Data Manipulation#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::string> |
pd_categorical_accessor.h:142 |
|
|
std::vector<std::string> |
pd_categorical_accessor.h:157 |
Arithmetic#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
std::vector<std::string> |
pd_categorical_accessor.h:95 |
I/O#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
CategoricalArray |
pd_categorical_accessor.h:213 |
Other Methods#
Signature |
Return Type |
Location |
Example |
|---|---|---|---|
|
CategoricalAccessor |
pd_categorical_accessor.h:229 |
|
|
CategoricalAccessor |
pd_categorical_accessor.h:237 |
|
|
std::vector<std::string> |
pd_categorical_accessor.h:79 |
|
|
std::vector<int32_t> |
pd_categorical_accessor.h:84 |
|
|
void |
pd_categorical_accessor.h:35 |
|
|
bool |
pd_categorical_accessor.h:89 |
|
|
const ParentType& |
pd_categorical_accessor.h:76 |
|
|
explicit CategoricalAccessor(const ParentType& parent) : |
pd_categorical_accessor.h:73 |
|
|
std::vector<std::string> |
pd_categorical_accessor.h:107 |
|
|
std::vector<std::string> |
pd_categorical_accessor.h:119 |
|
|
std::vector<std::string> |
pd_categorical_accessor.h:177 |
|
|
CategoricalAccessor |
pd_categorical_accessor.h:230 |
|
|
CategoricalAccessor |
pd_categorical_accessor.h:238 |
|
|
std::vector<std::string> |
pd_categorical_accessor.h:199 |
Code Examples#
The following examples are extracted from the test suite.
rename_categories (pd_test_1_all.cpp:655)
645 void pd_test_categorical_array_rename_categories() {
646 std::cout << "========= CategoricalArray: rename_categories ======================= ";
647
648 std::vector<std::string> cats = {"a", "b"};
649 std::vector<numpy::int32> codes = {0, 1, 0}; // a, b, a
650 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
651
652 // Rename categories
653 std::vector<std::string> new_names = {"alpha", "beta"};
654 pandas::CategoricalArray result = arr.rename_categories(new_names);
655
656 // Check categories are renamed
657 const std::vector<std::string>& result_cats = result.categories();
658 if (result_cats[0] != "alpha" || result_cats[1] != "beta") {
659 std::cout << " [FAIL] : in pd_test_categorical_array_rename_categories() : categories not renamed" << std::endl;
660 throw std::runtime_error("pd_test_categorical_array_rename_categories failed: categories not renamed");
661 }
662
663 // Values should now be renamed
664 std::optional<std::string> val = result[0];
rename_categories (pd_test_1_all.cpp:655)
645 void pd_test_categorical_array_rename_categories() {
646 std::cout << "========= CategoricalArray: rename_categories ======================= ";
647
648 std::vector<std::string> cats = {"a", "b"};
649 std::vector<numpy::int32> codes = {0, 1, 0}; // a, b, a
650 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
651
652 // Rename categories
653 std::vector<std::string> new_names = {"alpha", "beta"};
654 pandas::CategoricalArray result = arr.rename_categories(new_names);
655
656 // Check categories are renamed
657 const std::vector<std::string>& result_cats = result.categories();
658 if (result_cats[0] != "alpha" || result_cats[1] != "beta") {
659 std::cout << " [FAIL] : in pd_test_categorical_array_rename_categories() : categories not renamed" << std::endl;
660 throw std::runtime_error("pd_test_categorical_array_rename_categories failed: categories not renamed");
661 }
662
663 // Values should now be renamed
664 std::optional<std::string> val = result[0];
add_categories (pd_test_1_all.cpp:555)
545 }
546
547 void pd_test_categorical_array_add_categories() {
548 std::cout << "========= CategoricalArray: add_categories ======================= ";
549
550 std::vector<std::string> cats = {"a", "b"};
551 std::vector<numpy::int32> codes = {0, 1, 0};
552 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
553
554 // Add new categories
555 pandas::CategoricalArray result = arr.add_categories({"c", "d"});
556 if (result.categories().size() != 4) {
557 std::cout << " [FAIL] : in pd_test_categorical_array_add_categories() : new categories size != 4" << std::endl;
558 throw std::runtime_error("pd_test_categorical_array_add_categories failed: new categories size != 4");
559 }
560
561 // Original values should be preserved
562 std::optional<std::string> val = result[0];
563 if (!val.has_value() || *val != "a") {
564 std::cout << " [FAIL] : in pd_test_categorical_array_add_categories() : value not preserved" << std::endl;
565 throw std::runtime_error("pd_test_categorical_array_add_categories failed: value not preserved");
to_categorical_array (pd_test_3_all.cpp:28619)
28609 fail += cgo_check(r3.ordered() == arr.ordered(), "ordered inherited");
28610 if (fail == 0) std::cout << " OK" << std::endl;
28611 if (fail != 0) throw std::runtime_error("pd_test_cat_set_categories_ordered failed");
28612}
28613
28614void pd_test_cat_to_array_bridge() {
28615 std::cout << " -- pd_test_cat_to_array_bridge --" << std::endl;
28616 int fail = 0;
28617 pandas::Series<std::string> s({"a", "b", "c", "a"}, "x");
28618 auto acc = s.cat();
28619 auto arr = acc.to_categorical_array();
28620 fail += cgo_check(arr.size() == 4, "size");
28621 const auto& cats = arr.categories();
28622 fail += cgo_check(cats.size() == 3, "3 categories");
28623 // Now set_categories via accessor delegation should filter
28624 auto newc = acc.set_categories({"a", "b"});
28625 fail += cgo_check(newc.size() == 2, "set_categories delegation returned 2");
28626 if (fail == 0) std::cout << " OK" << std::endl;
28627 if (fail != 0) throw std::runtime_error("pd_test_cat_to_array_bridge failed");
28628}
as_ordered (pd_test_1_all.cpp:791)
781 unordered.min();
782 } catch (const std::exception&) {
783 threw = true;
784 }
785 if (!threw) {
786 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : unordered min should throw" << std::endl;
787 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: unordered min should throw");
788 }
789
790 // Test as_ordered / as_unordered
791 pandas::CategoricalArray reordered = unordered.as_ordered();
792 if (!reordered.ordered()) {
793 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : as_ordered failed" << std::endl;
794 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: as_ordered failed");
795 }
796
797 std::cout << " -> tests passed" << std::endl;
798 }
799
800 void pd_test_categorical_array_comparisons() {
801 std::cout << "========= CategoricalArray: comparisons ======================= ";
as_unordered (pd_test_1_all.cpp:778)
768 }
769
770 // Test max
771 std::optional<std::string> max_val = arr.max();
772 if (!max_val.has_value() || *max_val != "high") {
773 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : max != 'high'" << std::endl;
774 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: max != 'high'");
775 }
776
777 // Test unordered throws for min/max
778 pandas::CategoricalArray unordered = arr.as_unordered();
779 bool threw = false;
780 try {
781 unordered.min();
782 } catch (const std::exception&) {
783 threw = true;
784 }
785 if (!threw) {
786 std::cout << " [FAIL] : in pd_test_categorical_array_ordered_operations() : unordered min should throw" << std::endl;
787 throw std::runtime_error("pd_test_categorical_array_ordered_operations failed: unordered min should throw");
788 }
categories (pd_test_1_all.cpp:389)
379 std::vector<std::optional<std::string>> vals = {
380 std::optional<std::string>("low"),
381 std::optional<std::string>("high"),
382 std::optional<std::string>("medium")
383 };
384 pandas::CategoricalArray arr3(vals, cats, true); // ordered
385 if (!arr3.ordered()) {
386 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : should be ordered" << std::endl;
387 throw std::runtime_error("pd_test_categorical_array_constructors failed: should be ordered");
388 }
389 if (arr3.categories().size() != 3) {
390 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : categories size != 3" << std::endl;
391 throw std::runtime_error("pd_test_categorical_array_constructors failed: categories size != 3");
392 }
393
394 std::cout << " -> tests passed" << std::endl;
395 }
396
397 void pd_test_categorical_array_from_codes() {
398 std::cout << "========= CategoricalArray: from_codes ======================= ";
codes (pd_test_1_all.cpp:473)
463 std::cout << " -> tests passed" << std::endl;
464 }
465
466 void pd_test_categorical_array_codes_property() {
467 std::cout << "========= CategoricalArray: codes property ======================= ";
468
469 std::vector<std::string> cats = {"x", "y", "z"};
470 std::vector<numpy::int32> codes = {0, 1, 2, 1, 0};
471 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
472
473 numpy::NDArray<numpy::int32> arr_codes = arr.codes();
474 if (arr_codes.getSize() != 5) {
475 std::cout << " [FAIL] : in pd_test_categorical_array_codes_property() : codes size != 5" << std::endl;
476 throw std::runtime_error("pd_test_categorical_array_codes_property failed: codes size != 5");
477 }
478
479 // Check codes match
480 for (size_t i = 0; i < codes.size(); ++i) {
481 if (arr_codes.getElementAt({i}) != codes[i]) {
482 std::cout << " [FAIL] : in pd_test_categorical_array_codes_property() : code mismatch at " << i << std::endl;
483 throw std::runtime_error("pd_test_categorical_array_codes_property failed: code mismatch");
ordered (pd_test_1_all.cpp:359)
349 void pd_test_categorical_array_constructors() {
350 std::cout << "========= CategoricalArray: constructors ======================= ";
351
352 // Default constructor
353 pandas::CategoricalArray arr1;
354 if (arr1.size() != 0) {
355 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default constructor size != 0" << std::endl;
356 throw std::runtime_error("pd_test_categorical_array_constructors failed: default constructor size != 0");
357 }
358 if (arr1.ordered()) {
359 std::cout << " [FAIL] : in pd_test_categorical_array_constructors() : default should be unordered" << std::endl;
360 throw std::runtime_error("pd_test_categorical_array_constructors failed: default should be unordered");
361 }
362
363 // Constructor from values (infer categories)
364 std::vector<std::optional<std::string>> values = {
365 std::optional<std::string>("a"),
366 std::optional<std::string>("b"),
367 std::optional<std::string>("a"),
368 std::optional<std::string>("c")
remove_categories (pd_test_1_all.cpp:591)
581 }
582
583 void pd_test_categorical_array_remove_categories() {
584 std::cout << "========= CategoricalArray: remove_categories ======================= ";
585
586 std::vector<std::string> cats = {"a", "b", "c"};
587 std::vector<numpy::int32> codes = {0, 1, 2, 1}; // a, b, c, b
588 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
589
590 // Remove 'c' - values with 'c' become NA
591 pandas::CategoricalArray result = arr.remove_categories({"c"});
592
593 if (result.categories().size() != 2) {
594 std::cout << " [FAIL] : in pd_test_categorical_array_remove_categories() : categories size != 2" << std::endl;
595 throw std::runtime_error("pd_test_categorical_array_remove_categories failed: categories size != 2");
596 }
597
598 // Element at index 2 should now be NA (was 'c')
599 if (!result.is_na(2)) {
600 std::cout << " [FAIL] : in pd_test_categorical_array_remove_categories() : removed category should be NA" << std::endl;
601 throw std::runtime_error("pd_test_categorical_array_remove_categories failed: removed category should be NA");
remove_unused_categories (pd_test_1_all.cpp:737)
727 std::cout << " -> tests passed" << std::endl;
728 }
729
730 void pd_test_categorical_array_remove_unused_categories() {
731 std::cout << "========= CategoricalArray: remove_unused_categories ======================= ";
732
733 std::vector<std::string> cats = {"a", "b", "c", "d"};
734 std::vector<numpy::int32> codes = {0, 0, 2}; // a, a, c (b and d unused)
735 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
736
737 pandas::CategoricalArray result = arr.remove_unused_categories();
738
739 // Only 'a' and 'c' should remain
740 if (result.categories().size() != 2) {
741 std::cout << " [FAIL] : in pd_test_categorical_array_remove_unused_categories() : categories size != 2" << std::endl;
742 throw std::runtime_error("pd_test_categorical_array_remove_unused_categories failed: categories size != 2");
743 }
744
745 // Values should be preserved
746 std::optional<std::string> val0 = result[0];
747 std::optional<std::string> val2 = result[2];
reorder_categories (pd_test_1_all.cpp:695)
685 void pd_test_categorical_array_reorder_categories() {
686 std::cout << "========= CategoricalArray: reorder_categories ======================= ";
687
688 std::vector<std::string> cats = {"a", "b", "c"};
689 std::vector<numpy::int32> codes = {0, 1, 2}; // a, b, c
690 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
691
692 // Reorder categories
693 std::vector<std::string> new_order = {"c", "b", "a"};
694 pandas::CategoricalArray result = arr.reorder_categories(new_order);
695
696 // Check categories are reordered
697 const std::vector<std::string>& result_cats = result.categories();
698 if (result_cats[0] != "c" || result_cats[1] != "b" || result_cats[2] != "a") {
699 std::cout << " [FAIL] : in pd_test_categorical_array_reorder_categories() : categories not reordered" << std::endl;
700 throw std::runtime_error("pd_test_categorical_array_reorder_categories failed: categories not reordered");
701 }
702
703 // Values should be preserved
704 std::optional<std::string> val0 = result[0];
result (pd_test_1_all.cpp:15406)
15396 data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397 data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400 mask.setElementAt({0}, numpy::bool_(false));
15401 mask.setElementAt({1}, numpy::bool_(false));
15402
15403 pandas::DatetimeArray arr(data, mask);
15404 pandas::DatetimeIndexBase idx(arr, "original");
15405
15406 // Create join result (int64 values)
15407 numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408 join_result.setElementAt({0}, numpy::int64(500LL));
15409 join_result.setElementAt({1}, numpy::int64(600LL));
15410 join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412 auto new_idx = idx._from_join_target(join_result);
15413
15414 bool passed = (new_idx.size() == 3 &&
15415 new_idx.name().has_value() && *new_idx.name() == "original");
15416 if (!passed) {
result (pd_test_1_all.cpp:15406)
15396 data.setElementAt({0}, numpy::datetime64(100LL, numpy::DateTimeUnit::Nanosecond));
15397 data.setElementAt({1}, numpy::datetime64(200LL, numpy::DateTimeUnit::Nanosecond));
15398
15399 numpy::NDArray<numpy::bool_> mask(std::vector<size_t>{2});
15400 mask.setElementAt({0}, numpy::bool_(false));
15401 mask.setElementAt({1}, numpy::bool_(false));
15402
15403 pandas::DatetimeArray arr(data, mask);
15404 pandas::DatetimeIndexBase idx(arr, "original");
15405
15406 // Create join result (int64 values)
15407 numpy::NDArray<numpy::int64> join_result(std::vector<size_t>{3});
15408 join_result.setElementAt({0}, numpy::int64(500LL));
15409 join_result.setElementAt({1}, numpy::int64(600LL));
15410 join_result.setElementAt({2}, numpy::int64(700LL));
15411
15412 auto new_idx = idx._from_join_target(join_result);
15413
15414 bool passed = (new_idx.size() == 3 &&
15415 new_idx.name().has_value() && *new_idx.name() == "original");
15416 if (!passed) {
set_categories (pd_test_1_all.cpp:623)
613 void pd_test_categorical_array_set_categories() {
614 std::cout << "========= CategoricalArray: set_categories ======================= ";
615
616 std::vector<std::string> cats = {"a", "b"};
617 std::vector<numpy::int32> codes = {0, 1, 0}; // a, b, a
618 pandas::CategoricalArray arr = pandas::CategoricalArray::from_codes(codes, cats);
619
620 // Set new categories (values not in new categories become NA)
621 std::vector<std::string> new_cats = {"a", "c"}; // 'b' removed, 'c' added
622 pandas::CategoricalArray result = arr.set_categories(new_cats);
623
624 if (result.categories().size() != 2) {
625 std::cout << " [FAIL] : in pd_test_categorical_array_set_categories() : categories size != 2" << std::endl;
626 throw std::runtime_error("pd_test_categorical_array_set_categories failed: categories size != 2");
627 }
628
629 // Element at index 1 should be NA (was 'b', now not in categories)
630 if (!result.is_na(1)) {
631 std::cout << " [FAIL] : in pd_test_categorical_array_set_categories() : 'b' value should be NA" << std::endl;
632 throw std::runtime_error("pd_test_categorical_array_set_categories failed: 'b' value should be NA");