English: sample mean and variance of IID samples from a standard Cauchy distribution.
```python
import numpy as np
import matplotlib.pyplot as plt
def running_sample_variance(data):
"""
Welford's online algorithm to calculate the variance.
In each iteration, the mean and M2 (the sum of the squares of differences
from the mean) are updated.
For each column i, the difference delta between the current data point and
the previous mean is calculated, and the mean and M2 are updated accordingly.
variance = M2 / np.arange(1, m + 1).
"""
n, m = data.shape
mean = np.zeros((n, m))
M2 = np.zeros((n, m))
for i in range(m):
if i == 0:
delta = data[:, i] - mean[:, i]
mean[:, i] = mean[:, i] + delta / (i + 1)
M2[:, i] = delta * (data[:, i] - mean[:, i])
else:
delta = data[:, i] - mean[:, i - 1]
mean[:, i] = mean[:, i - 1] + delta / (i + 1)
M2[:, i] = M2[:, i - 1] + delta * (data[:, i] - mean[:, i])
variance_n = M2 / (np.arange(1, m + 1))
return variance_n
def running_sample_average(data):
cumsum = np.cumsum(data, axis=1)
cum_n = np.arange(1, data.shape[1] + 1)
return cumsum / cum_n
- Parameters
n = 10 # Number of rows
m = 500000 # Number of columns
- Generate Cauchy-distributed variables
data = np.random.standard_cauchy(size=(n, m))
variances = running_sample_variance(data)
mean = running_sample_average(data)
- Plot the variances
fig, (ax2, ax1) = plt.subplots(1, 2, figsize=(16, 5))
- Plot the variances in the first subplot
x = np.arange(1, variances.shape[1] + 1)
ax1.semilogy(variances.T, linewidth=1)
ax1.set_xlabel('Number of Samples')
ax1.set_ylabel('Variance')
ax1.set_title('Running Sample Variances')
- Plot the running sample averages in the second subplot
ax2.semilogy(np.abs(mean.T), linewidth=1)
ax2.set_xlabel('Number of Samples')
ax2.set_ylabel('Average (absolute value)')
ax2.set_title('Running Sample Averages')
- Adjust the layout and display the plot
plt.tight_layout()
plt.show()
```