Statistics Functions ==================== .. currentmodule:: numpycore Functions for computing statistics on arrays. Example ------- .. code-block:: python import numpycore as np arr = np.array([1, 2, 3, 4, 5]) # Basic statistics print(np.mean(arr)) # 3.0 print(np.std(arr)) # 1.4142... print(np.var(arr)) # 2.0 # With axis parameter mat = np.array([[1, 2], [3, 4]]) print(np.mean(mat, axis=0)) # [2., 3.] print(np.mean(mat, axis=1)) # [1.5, 3.5] Order Statistics ---------------- .. list-table:: :widths: 25 60 15 :header-rows: 1 * - Function - Description - Example * - amin(a, axis) - Minimum of array - * - amax(a, axis) - Maximum of array - * - nanmin(a, axis) - Minimum ignoring NaNs - * - nanmax(a, axis) - Maximum ignoring NaNs - * - ptp(a, axis) - Range of values (max - min) - * - percentile(a, q, axis) - q-th percentile - * - nanpercentile(a, q, axis) - q-th percentile ignoring NaNs - * - quantile(a, q, axis) - q-th quantile - * - nanquantile(a, q, axis) - q-th quantile ignoring NaNs - Averages and Variances ---------------------- .. list-table:: :widths: 25 60 15 :header-rows: 1 * - Function - Description - Example * - mean(a, axis) - Arithmetic mean - :ref:`View ` * - average(a, axis, weights) - Weighted average - * - median(a, axis) - Median - :ref:`View ` * - nanmean(a, axis) - Mean ignoring NaNs - * - nanmedian(a, axis) - Median ignoring NaNs - * - std(a, axis, ddof) - Standard deviation - :ref:`View ` * - var(a, axis, ddof) - Variance - :ref:`View ` * - nanstd(a, axis, ddof) - Std ignoring NaNs - * - nanvar(a, axis, ddof) - Variance ignoring NaNs - Sums and Products ----------------- .. list-table:: :widths: 25 60 15 :header-rows: 1 * - Function - Description - Example * - sum(a, axis) - Sum of array elements - :ref:`View ` * - prod(a, axis) - Product of array elements - * - nansum(a, axis) - Sum ignoring NaNs - * - nanprod(a, axis) - Product ignoring NaNs - * - cumsum(a, axis) - Cumulative sum - * - cumprod(a, axis) - Cumulative product - * - nancumsum(a, axis) - Cumulative sum ignoring NaNs - * - nancumprod(a, axis) - Cumulative product ignoring NaNs - Correlations ------------ .. list-table:: :widths: 25 60 15 :header-rows: 1 * - Function - Description - Example * - corrcoef(x, y) - Pearson correlation coefficients - * - cov(m, y) - Covariance matrix - * - correlate(a, v, mode) - Cross-correlation - Histograms ---------- .. list-table:: :widths: 25 60 15 :header-rows: 1 * - Function - Description - Example * - histogram(a, bins) - Compute histogram - * - histogram2d(x, y, bins) - Compute 2-D histogram - :ref:`View ` * - histogramdd(sample, bins) - Compute N-D histogram - :ref:`View ` * - bincount(x, weights) - Count occurrences - * - digitize(x, bins) - Return indices of bins - Counting -------- .. list-table:: :widths: 25 60 15 :header-rows: 1 * - Function - Description - Example * - count_nonzero(a, axis) - Count non-zero elements - * - unique(ar) - Unique elements - Code Examples ------------- The following examples are extracted from the test suite. .. _example-stats-average-mean-0: .. dropdown:: mean (test_bool_stats.py:60) :class-title: example-dropdown .. code-block:: python :linenos: :lineno-start: 50 :emphasize-lines: 11 return False # ============================================================================ # GROUP 1: mean() with bool scalars # ============================================================================ def test_mean_all_true(): """Test mean([True, True, True]) = 1.0""" arr = test_bind.np.array([True, True, True], dtype=bool) np_result = test_bind.np.mean(arr) nc_result = test_bind.numpycore.mean(arr) assert_close(np_result, nc_result, "mean([T, T, T])") def test_mean_all_false(): """Test mean([False, False, False]) = 0.0""" arr = test_bind.np.array([False, False, False], dtype=bool) np_result = test_bind.np.mean(arr) nc_result = test_bind.numpycore.mean(arr) assert_close(np_result, nc_result, "mean([F, F, F])") def test_mean_mixed_75(): .. _example-stats-average-median-1: .. dropdown:: median (test_field_accessors.py:234) :class-title: example-dropdown .. code-block:: python :linenos: :lineno-start: 224 :emphasize-lines: 11 def test_field_median(): """Test median on field""" # NumPy np_arr = test_bind.np.zeros(51, dtype=[('score', 'f8')]) np_arr['score'] = test_bind.np.arange(51, dtype='f8') np_median = test_bind.np.median(np_arr['score']) # NumpyCore - use numpycore.median which should work on NumPy arrays nc_arr = test_bind.numpycore.structured_zeros(51, dtype=[('score', 'f8')]) nc_arr['score'] = test_bind.np.arange(51, dtype='f8') nc_median = test_bind.numpycore.median(nc_arr['score']) assert_close(np_median, nc_median, "field_median") def test_field_multiple_stats(): """Test multiple statistics on same field""" arr = test_bind.numpycore.structured_zeros(1000, dtype=[('temperature', 'f8')]) arr['temperature'] = test_bind.np.random.randn(1000) * 5 + 20 temp = arr['temperature'] mean_temp = test_bind.numpycore.mean(temp) .. _example-stats-average-std-2: .. dropdown:: std (test_field_accessors.py:189) :class-title: example-dropdown .. code-block:: python :linenos: :lineno-start: 179 :emphasize-lines: 11 def test_field_std(): """Test standard deviation on field""" # NumPy np_arr = test_bind.np.zeros(100, dtype=[('measurement', 'f8')]) np_arr['measurement'] = test_bind.np.random.randn(100) * 10 + 50 np_std = test_bind.np.std(np_arr['measurement']) # NumpyCore nc_arr = test_bind.numpycore.structured_zeros(100, dtype=[('measurement', 'f8')]) nc_arr['measurement'] = np_arr['measurement'] nc_std = test_bind.numpycore.std(nc_arr['measurement']) assert_close(np_std, nc_std, "field_std", tol=1e-9) def test_field_min_max(): """Test min and max on field""" # NumPy np_arr = test_bind.np.zeros(50, dtype=[('value', 'f8')]) np_arr['value'] = test_bind.np.random.randn(50) np_min = test_bind.np.min(np_arr['value']) np_max = test_bind.np.max(np_arr['value']) .. _example-stats-average-var-3: .. dropdown:: var (test_field_accessors.py:175) :class-title: example-dropdown .. code-block:: python :linenos: :lineno-start: 165 :emphasize-lines: 11 def test_field_variance(): """Test variance on field""" # NumPy np_arr = test_bind.np.zeros(100, dtype=[('data', 'f8')]) np_arr['data'] = test_bind.np.random.randn(100) np_var = test_bind.np.var(np_arr['data']) # NumpyCore nc_arr = test_bind.numpycore.structured_zeros(100, dtype=[('data', 'f8')]) nc_arr['data'] = np_arr['data'] # Use same data nc_var = test_bind.numpycore.var(nc_arr['data']) assert_close(np_var, nc_var, "field_variance", tol=1e-9) def test_field_std(): """Test standard deviation on field""" # NumPy np_arr = test_bind.np.zeros(100, dtype=[('measurement', 'f8')]) np_arr['measurement'] = test_bind.np.random.randn(100) * 10 + 50 np_std = test_bind.np.std(np_arr['measurement']) .. _example-stats-sums-sum-4: .. dropdown:: sum (test_bool_stats.py:163) :class-title: example-dropdown .. code-block:: python :linenos: :lineno-start: 153 :emphasize-lines: 11 # ============================================================================ # GROUP 4: sum() with bool # ============================================================================ def test_sum_count_trues(): """Test sum([T, F, T, T]) = 3 (count True values)""" arr = test_bind.np.array([True, False, True, True], dtype=bool) np_result = test_bind.np.sum(arr) nc_result = test_bind.numpycore.sum(arr) assert_close(np_result, nc_result, "sum([T, F, T, T])") def test_sum_all_false(): """Test sum([F, F]) = 0""" arr = test_bind.np.array([False, False], dtype=bool) np_result = test_bind.np.sum(arr) nc_result = test_bind.numpycore.sum(arr) assert_close(np_result, nc_result, "sum([F, F])") .. _example-stats-histogram-histogram2d-5: .. dropdown:: histogram2d (test_stats_missing.py:69) :class-title: example-dropdown .. code-block:: python :linenos: :lineno-start: 59 :emphasize-lines: 11 def test_histogram2d_basic(): """Test basic 2D histogram computation""" np_x = test_bind.np.array([1.0, 2.0, 1.5, 3.0, 2.5, 4.0, 3.5, 1.2]) np_y = test_bind.np.array([1.0, 1.5, 2.0, 1.8, 2.2, 1.3, 2.5, 1.1]) nc_x = test_bind.numpycore.array([1.0, 2.0, 1.5, 3.0, 2.5, 4.0, 3.5, 1.2]) nc_y = test_bind.numpycore.array([1.0, 1.5, 2.0, 1.8, 2.2, 1.3, 2.5, 1.1]) np_hist, np_x_edges, np_y_edges = test_bind.np.histogram2d(np_x, np_y, bins=5) nc_hist, nc_x_edges, nc_y_edges = test_bind.numpycore.histogram2d(nc_x, nc_y, bins=5) assert_arrays_equal(np_hist, nc_hist, "histogram2d_basic_hist") assert_arrays_equal(np_x_edges, nc_x_edges, "histogram2d_basic_x_edges") assert_arrays_equal(np_y_edges, nc_y_edges, "histogram2d_basic_y_edges") def test_histogram2d_uniform(): """Test 2D histogram with uniform distribution""" # Create uniform 4x4 grid of points np_x = test_bind.np.array([0.5, 1.5, 2.5, 3.5, 0.5, 1.5, 2.5, 3.5, 0.5, 1.5, 2.5, 3.5, 0.5, 1.5, 2.5, 3.5]) .. _example-stats-histogram-histogramdd-6: .. dropdown:: histogramdd (test_stats_missing.py:172) :class-title: example-dropdown .. code-block:: python :linenos: :lineno-start: 162 :emphasize-lines: 11 def test_histogramdd_2d(): """Test histogramdd with 2D data (should match histogram2d behavior)""" np_sample = test_bind.np.array([[1.0, 1.0], [2.0, 1.5], [1.5, 2.0], [3.0, 1.8], [2.5, 2.2], [4.0, 1.3], [3.5, 2.5], [1.2, 1.1]]) nc_sample = test_bind.numpycore.array([[1.0, 1.0], [2.0, 1.5], [1.5, 2.0], [3.0, 1.8], [2.5, 2.2], [4.0, 1.3], [3.5, 2.5], [1.2, 1.1]]) np_hist, np_edges = test_bind.np.histogramdd(np_sample, bins=5) nc_hist, nc_edges = test_bind.numpycore.histogramdd(nc_sample, bins=5) assert_arrays_equal(np_hist, nc_hist, "histogramdd_2d_hist") # Check edges for i in range(len(np_edges)): assert_arrays_equal(np_edges[i], nc_edges[i], f"histogramdd_2d_edges_{i}") def test_histogramdd_3d(): """Test histogramdd with 3D data (8 corner points of unit cube)""" np_sample = test_bind.np.array([