Statistics Functions#
Functions for computing statistics on arrays.
Example#
import numpycore as np
arr = np.array([1, 2, 3, 4, 5])
# Basic statistics
print(np.mean(arr)) # 3.0
print(np.std(arr)) # 1.4142...
print(np.var(arr)) # 2.0
# With axis parameter
mat = np.array([[1, 2], [3, 4]])
print(np.mean(mat, axis=0)) # [2., 3.]
print(np.mean(mat, axis=1)) # [1.5, 3.5]
Order Statistics#
Function |
Description |
Example |
|---|---|---|
amin(a, axis) |
Minimum of array |
|
amax(a, axis) |
Maximum of array |
|
nanmin(a, axis) |
Minimum ignoring NaNs |
|
nanmax(a, axis) |
Maximum ignoring NaNs |
|
ptp(a, axis) |
Range of values (max - min) |
|
percentile(a, q, axis) |
q-th percentile |
|
nanpercentile(a, q, axis) |
q-th percentile ignoring NaNs |
|
quantile(a, q, axis) |
q-th quantile |
|
nanquantile(a, q, axis) |
q-th quantile ignoring NaNs |
Averages and Variances#
Function |
Description |
Example |
|---|---|---|
mean(a, axis) |
Arithmetic mean |
|
average(a, axis, weights) |
Weighted average |
|
median(a, axis) |
Median |
|
nanmean(a, axis) |
Mean ignoring NaNs |
|
nanmedian(a, axis) |
Median ignoring NaNs |
|
std(a, axis, ddof) |
Standard deviation |
|
var(a, axis, ddof) |
Variance |
|
nanstd(a, axis, ddof) |
Std ignoring NaNs |
|
nanvar(a, axis, ddof) |
Variance ignoring NaNs |
Sums and Products#
Function |
Description |
Example |
|---|---|---|
sum(a, axis) |
Sum of array elements |
|
prod(a, axis) |
Product of array elements |
|
nansum(a, axis) |
Sum ignoring NaNs |
|
nanprod(a, axis) |
Product ignoring NaNs |
|
cumsum(a, axis) |
Cumulative sum |
|
cumprod(a, axis) |
Cumulative product |
|
nancumsum(a, axis) |
Cumulative sum ignoring NaNs |
|
nancumprod(a, axis) |
Cumulative product ignoring NaNs |
Correlations#
Function |
Description |
Example |
|---|---|---|
corrcoef(x, y) |
Pearson correlation coefficients |
|
cov(m, y) |
Covariance matrix |
|
correlate(a, v, mode) |
Cross-correlation |
Histograms#
Counting#
Function |
Description |
Example |
|---|---|---|
count_nonzero(a, axis) |
Count non-zero elements |
|
unique(ar) |
Unique elements |
Code Examples#
The following examples are extracted from the test suite.
mean (test_bool_stats.py:60)
50 return False
51
52# ============================================================================
53# GROUP 1: mean() with bool scalars
54# ============================================================================
55
56def test_mean_all_true():
57 """Test mean([True, True, True]) = 1.0"""
58 arr = test_bind.np.array([True, True, True], dtype=bool)
59 np_result = test_bind.np.mean(arr)
60 nc_result = test_bind.numpycore.mean(arr)
61 assert_close(np_result, nc_result, "mean([T, T, T])")
62
63def test_mean_all_false():
64 """Test mean([False, False, False]) = 0.0"""
65 arr = test_bind.np.array([False, False, False], dtype=bool)
66 np_result = test_bind.np.mean(arr)
67 nc_result = test_bind.numpycore.mean(arr)
68 assert_close(np_result, nc_result, "mean([F, F, F])")
69
70def test_mean_mixed_75():
median (test_field_accessors.py:234)
224def test_field_median():
225 """Test median on field"""
226 # NumPy
227 np_arr = test_bind.np.zeros(51, dtype=[('score', 'f8')])
228 np_arr['score'] = test_bind.np.arange(51, dtype='f8')
229 np_median = test_bind.np.median(np_arr['score'])
230
231 # NumpyCore - use numpycore.median which should work on NumPy arrays
232 nc_arr = test_bind.numpycore.structured_zeros(51, dtype=[('score', 'f8')])
233 nc_arr['score'] = test_bind.np.arange(51, dtype='f8')
234 nc_median = test_bind.numpycore.median(nc_arr['score'])
235
236 assert_close(np_median, nc_median, "field_median")
237
238def test_field_multiple_stats():
239 """Test multiple statistics on same field"""
240 arr = test_bind.numpycore.structured_zeros(1000, dtype=[('temperature', 'f8')])
241 arr['temperature'] = test_bind.np.random.randn(1000) * 5 + 20
242
243 temp = arr['temperature']
244 mean_temp = test_bind.numpycore.mean(temp)
std (test_field_accessors.py:189)
179def test_field_std():
180 """Test standard deviation on field"""
181 # NumPy
182 np_arr = test_bind.np.zeros(100, dtype=[('measurement', 'f8')])
183 np_arr['measurement'] = test_bind.np.random.randn(100) * 10 + 50
184 np_std = test_bind.np.std(np_arr['measurement'])
185
186 # NumpyCore
187 nc_arr = test_bind.numpycore.structured_zeros(100, dtype=[('measurement', 'f8')])
188 nc_arr['measurement'] = np_arr['measurement']
189 nc_std = test_bind.numpycore.std(nc_arr['measurement'])
190
191 assert_close(np_std, nc_std, "field_std", tol=1e-9)
192
193def test_field_min_max():
194 """Test min and max on field"""
195 # NumPy
196 np_arr = test_bind.np.zeros(50, dtype=[('value', 'f8')])
197 np_arr['value'] = test_bind.np.random.randn(50)
198 np_min = test_bind.np.min(np_arr['value'])
199 np_max = test_bind.np.max(np_arr['value'])
var (test_field_accessors.py:175)
165def test_field_variance():
166 """Test variance on field"""
167 # NumPy
168 np_arr = test_bind.np.zeros(100, dtype=[('data', 'f8')])
169 np_arr['data'] = test_bind.np.random.randn(100)
170 np_var = test_bind.np.var(np_arr['data'])
171
172 # NumpyCore
173 nc_arr = test_bind.numpycore.structured_zeros(100, dtype=[('data', 'f8')])
174 nc_arr['data'] = np_arr['data'] # Use same data
175 nc_var = test_bind.numpycore.var(nc_arr['data'])
176
177 assert_close(np_var, nc_var, "field_variance", tol=1e-9)
178
179def test_field_std():
180 """Test standard deviation on field"""
181 # NumPy
182 np_arr = test_bind.np.zeros(100, dtype=[('measurement', 'f8')])
183 np_arr['measurement'] = test_bind.np.random.randn(100) * 10 + 50
184 np_std = test_bind.np.std(np_arr['measurement'])
sum (test_bool_stats.py:163)
153# ============================================================================
154# GROUP 4: sum() with bool
155# ============================================================================
156
157def test_sum_count_trues():
158 """Test sum([T, F, T, T]) = 3 (count True values)"""
159 arr = test_bind.np.array([True, False, True, True], dtype=bool)
160
161 np_result = test_bind.np.sum(arr)
162 nc_result = test_bind.numpycore.sum(arr)
163 assert_close(np_result, nc_result, "sum([T, F, T, T])")
164
165def test_sum_all_false():
166 """Test sum([F, F]) = 0"""
167 arr = test_bind.np.array([False, False], dtype=bool)
168
169 np_result = test_bind.np.sum(arr)
170 nc_result = test_bind.numpycore.sum(arr)
171 assert_close(np_result, nc_result, "sum([F, F])")
histogram2d (test_stats_missing.py:69)
59def test_histogram2d_basic():
60 """Test basic 2D histogram computation"""
61 np_x = test_bind.np.array([1.0, 2.0, 1.5, 3.0, 2.5, 4.0, 3.5, 1.2])
62 np_y = test_bind.np.array([1.0, 1.5, 2.0, 1.8, 2.2, 1.3, 2.5, 1.1])
63
64 nc_x = test_bind.numpycore.array([1.0, 2.0, 1.5, 3.0, 2.5, 4.0, 3.5, 1.2])
65 nc_y = test_bind.numpycore.array([1.0, 1.5, 2.0, 1.8, 2.2, 1.3, 2.5, 1.1])
66
67 np_hist, np_x_edges, np_y_edges = test_bind.np.histogram2d(np_x, np_y, bins=5)
68 nc_hist, nc_x_edges, nc_y_edges = test_bind.numpycore.histogram2d(nc_x, nc_y, bins=5)
69
70 assert_arrays_equal(np_hist, nc_hist, "histogram2d_basic_hist")
71 assert_arrays_equal(np_x_edges, nc_x_edges, "histogram2d_basic_x_edges")
72 assert_arrays_equal(np_y_edges, nc_y_edges, "histogram2d_basic_y_edges")
73
74
75def test_histogram2d_uniform():
76 """Test 2D histogram with uniform distribution"""
77 # Create uniform 4x4 grid of points
78 np_x = test_bind.np.array([0.5, 1.5, 2.5, 3.5, 0.5, 1.5, 2.5, 3.5, 0.5, 1.5, 2.5, 3.5, 0.5, 1.5, 2.5, 3.5])
histogramdd (test_stats_missing.py:172)
162def test_histogramdd_2d():
163 """Test histogramdd with 2D data (should match histogram2d behavior)"""
164 np_sample = test_bind.np.array([[1.0, 1.0], [2.0, 1.5], [1.5, 2.0], [3.0, 1.8],
165 [2.5, 2.2], [4.0, 1.3], [3.5, 2.5], [1.2, 1.1]])
166
167 nc_sample = test_bind.numpycore.array([[1.0, 1.0], [2.0, 1.5], [1.5, 2.0], [3.0, 1.8],
168 [2.5, 2.2], [4.0, 1.3], [3.5, 2.5], [1.2, 1.1]])
169
170 np_hist, np_edges = test_bind.np.histogramdd(np_sample, bins=5)
171 nc_hist, nc_edges = test_bind.numpycore.histogramdd(nc_sample, bins=5)
172
173 assert_arrays_equal(np_hist, nc_hist, "histogramdd_2d_hist")
174 # Check edges
175 for i in range(len(np_edges)):
176 assert_arrays_equal(np_edges[i], nc_edges[i], f"histogramdd_2d_edges_{i}")
177
178
179def test_histogramdd_3d():
180 """Test histogramdd with 3D data (8 corner points of unit cube)"""
181 np_sample = test_bind.np.array([