Statistics Functions#

Functions for computing statistics on arrays.

Example#

import numpycore as np

arr = np.array([1, 2, 3, 4, 5])

# Basic statistics
print(np.mean(arr))    # 3.0
print(np.std(arr))     # 1.4142...
print(np.var(arr))     # 2.0

# With axis parameter
mat = np.array([[1, 2], [3, 4]])
print(np.mean(mat, axis=0))  # [2., 3.]
print(np.mean(mat, axis=1))  # [1.5, 3.5]

Order Statistics#

Function

Description

Example

amin(a, axis)

Minimum of array

amax(a, axis)

Maximum of array

nanmin(a, axis)

Minimum ignoring NaNs

nanmax(a, axis)

Maximum ignoring NaNs

ptp(a, axis)

Range of values (max - min)

percentile(a, q, axis)

q-th percentile

nanpercentile(a, q, axis)

q-th percentile ignoring NaNs

quantile(a, q, axis)

q-th quantile

nanquantile(a, q, axis)

q-th quantile ignoring NaNs

Averages and Variances#

Function

Description

Example

mean(a, axis)

Arithmetic mean

View

average(a, axis, weights)

Weighted average

median(a, axis)

Median

View

nanmean(a, axis)

Mean ignoring NaNs

nanmedian(a, axis)

Median ignoring NaNs

std(a, axis, ddof)

Standard deviation

View

var(a, axis, ddof)

Variance

View

nanstd(a, axis, ddof)

Std ignoring NaNs

nanvar(a, axis, ddof)

Variance ignoring NaNs

Sums and Products#

Function

Description

Example

sum(a, axis)

Sum of array elements

View

prod(a, axis)

Product of array elements

nansum(a, axis)

Sum ignoring NaNs

nanprod(a, axis)

Product ignoring NaNs

cumsum(a, axis)

Cumulative sum

cumprod(a, axis)

Cumulative product

nancumsum(a, axis)

Cumulative sum ignoring NaNs

nancumprod(a, axis)

Cumulative product ignoring NaNs

Correlations#

Function

Description

Example

corrcoef(x, y)

Pearson correlation coefficients

cov(m, y)

Covariance matrix

correlate(a, v, mode)

Cross-correlation

Histograms#

Function

Description

Example

histogram(a, bins)

Compute histogram

histogram2d(x, y, bins)

Compute 2-D histogram

View

histogramdd(sample, bins)

Compute N-D histogram

View

bincount(x, weights)

Count occurrences

digitize(x, bins)

Return indices of bins

Counting#

Function

Description

Example

count_nonzero(a, axis)

Count non-zero elements

unique(ar)

Unique elements

Code Examples#

The following examples are extracted from the test suite.

mean (test_bool_stats.py:60)
50        return False
51
52# ============================================================================
53# GROUP 1: mean() with bool scalars
54# ============================================================================
55
56def test_mean_all_true():
57    """Test mean([True, True, True]) = 1.0"""
58    arr = test_bind.np.array([True, True, True], dtype=bool)
59    np_result = test_bind.np.mean(arr)
60    nc_result = test_bind.numpycore.mean(arr)
61    assert_close(np_result, nc_result, "mean([T, T, T])")
62
63def test_mean_all_false():
64    """Test mean([False, False, False]) = 0.0"""
65    arr = test_bind.np.array([False, False, False], dtype=bool)
66    np_result = test_bind.np.mean(arr)
67    nc_result = test_bind.numpycore.mean(arr)
68    assert_close(np_result, nc_result, "mean([F, F, F])")
69
70def test_mean_mixed_75():
median (test_field_accessors.py:234)
224def test_field_median():
225    """Test median on field"""
226    # NumPy
227    np_arr = test_bind.np.zeros(51, dtype=[('score', 'f8')])
228    np_arr['score'] = test_bind.np.arange(51, dtype='f8')
229    np_median = test_bind.np.median(np_arr['score'])
230
231    # NumpyCore - use numpycore.median which should work on NumPy arrays
232    nc_arr = test_bind.numpycore.structured_zeros(51, dtype=[('score', 'f8')])
233    nc_arr['score'] = test_bind.np.arange(51, dtype='f8')
234    nc_median = test_bind.numpycore.median(nc_arr['score'])
235
236    assert_close(np_median, nc_median, "field_median")
237
238def test_field_multiple_stats():
239    """Test multiple statistics on same field"""
240    arr = test_bind.numpycore.structured_zeros(1000, dtype=[('temperature', 'f8')])
241    arr['temperature'] = test_bind.np.random.randn(1000) * 5 + 20
242
243    temp = arr['temperature']
244    mean_temp = test_bind.numpycore.mean(temp)
std (test_field_accessors.py:189)
179def test_field_std():
180    """Test standard deviation on field"""
181    # NumPy
182    np_arr = test_bind.np.zeros(100, dtype=[('measurement', 'f8')])
183    np_arr['measurement'] = test_bind.np.random.randn(100) * 10 + 50
184    np_std = test_bind.np.std(np_arr['measurement'])
185
186    # NumpyCore
187    nc_arr = test_bind.numpycore.structured_zeros(100, dtype=[('measurement', 'f8')])
188    nc_arr['measurement'] = np_arr['measurement']
189    nc_std = test_bind.numpycore.std(nc_arr['measurement'])
190
191    assert_close(np_std, nc_std, "field_std", tol=1e-9)
192
193def test_field_min_max():
194    """Test min and max on field"""
195    # NumPy
196    np_arr = test_bind.np.zeros(50, dtype=[('value', 'f8')])
197    np_arr['value'] = test_bind.np.random.randn(50)
198    np_min = test_bind.np.min(np_arr['value'])
199    np_max = test_bind.np.max(np_arr['value'])
var (test_field_accessors.py:175)
165def test_field_variance():
166    """Test variance on field"""
167    # NumPy
168    np_arr = test_bind.np.zeros(100, dtype=[('data', 'f8')])
169    np_arr['data'] = test_bind.np.random.randn(100)
170    np_var = test_bind.np.var(np_arr['data'])
171
172    # NumpyCore
173    nc_arr = test_bind.numpycore.structured_zeros(100, dtype=[('data', 'f8')])
174    nc_arr['data'] = np_arr['data']  # Use same data
175    nc_var = test_bind.numpycore.var(nc_arr['data'])
176
177    assert_close(np_var, nc_var, "field_variance", tol=1e-9)
178
179def test_field_std():
180    """Test standard deviation on field"""
181    # NumPy
182    np_arr = test_bind.np.zeros(100, dtype=[('measurement', 'f8')])
183    np_arr['measurement'] = test_bind.np.random.randn(100) * 10 + 50
184    np_std = test_bind.np.std(np_arr['measurement'])
sum (test_bool_stats.py:163)
153# ============================================================================
154# GROUP 4: sum() with bool
155# ============================================================================
156
157def test_sum_count_trues():
158    """Test sum([T, F, T, T]) = 3 (count True values)"""
159    arr = test_bind.np.array([True, False, True, True], dtype=bool)
160
161    np_result = test_bind.np.sum(arr)
162    nc_result = test_bind.numpycore.sum(arr)
163    assert_close(np_result, nc_result, "sum([T, F, T, T])")
164
165def test_sum_all_false():
166    """Test sum([F, F]) = 0"""
167    arr = test_bind.np.array([False, False], dtype=bool)
168
169    np_result = test_bind.np.sum(arr)
170    nc_result = test_bind.numpycore.sum(arr)
171    assert_close(np_result, nc_result, "sum([F, F])")
histogram2d (test_stats_missing.py:69)
59def test_histogram2d_basic():
60    """Test basic 2D histogram computation"""
61    np_x = test_bind.np.array([1.0, 2.0, 1.5, 3.0, 2.5, 4.0, 3.5, 1.2])
62    np_y = test_bind.np.array([1.0, 1.5, 2.0, 1.8, 2.2, 1.3, 2.5, 1.1])
63
64    nc_x = test_bind.numpycore.array([1.0, 2.0, 1.5, 3.0, 2.5, 4.0, 3.5, 1.2])
65    nc_y = test_bind.numpycore.array([1.0, 1.5, 2.0, 1.8, 2.2, 1.3, 2.5, 1.1])
66
67    np_hist, np_x_edges, np_y_edges = test_bind.np.histogram2d(np_x, np_y, bins=5)
68    nc_hist, nc_x_edges, nc_y_edges = test_bind.numpycore.histogram2d(nc_x, nc_y, bins=5)
69
70    assert_arrays_equal(np_hist, nc_hist, "histogram2d_basic_hist")
71    assert_arrays_equal(np_x_edges, nc_x_edges, "histogram2d_basic_x_edges")
72    assert_arrays_equal(np_y_edges, nc_y_edges, "histogram2d_basic_y_edges")
73
74
75def test_histogram2d_uniform():
76    """Test 2D histogram with uniform distribution"""
77    # Create uniform 4x4 grid of points
78    np_x = test_bind.np.array([0.5, 1.5, 2.5, 3.5, 0.5, 1.5, 2.5, 3.5, 0.5, 1.5, 2.5, 3.5, 0.5, 1.5, 2.5, 3.5])
histogramdd (test_stats_missing.py:172)
162def test_histogramdd_2d():
163    """Test histogramdd with 2D data (should match histogram2d behavior)"""
164    np_sample = test_bind.np.array([[1.0, 1.0], [2.0, 1.5], [1.5, 2.0], [3.0, 1.8],
165                                     [2.5, 2.2], [4.0, 1.3], [3.5, 2.5], [1.2, 1.1]])
166
167    nc_sample = test_bind.numpycore.array([[1.0, 1.0], [2.0, 1.5], [1.5, 2.0], [3.0, 1.8],
168                                            [2.5, 2.2], [4.0, 1.3], [3.5, 2.5], [1.2, 1.1]])
169
170    np_hist, np_edges = test_bind.np.histogramdd(np_sample, bins=5)
171    nc_hist, nc_edges = test_bind.numpycore.histogramdd(nc_sample, bins=5)
172
173    assert_arrays_equal(np_hist, nc_hist, "histogramdd_2d_hist")
174    # Check edges
175    for i in range(len(np_edges)):
176        assert_arrays_equal(np_edges[i], nc_edges[i], f"histogramdd_2d_edges_{i}")
177
178
179def test_histogramdd_3d():
180    """Test histogramdd with 3D data (8 corner points of unit cube)"""
181    np_sample = test_bind.np.array([