Spaces:
Running
Running
Refactor plotting functions for clarity and modularity.
Browse files- probability/15_poisson_distribution.py +258 -222
probability/15_poisson_distribution.py
CHANGED
@@ -13,7 +13,7 @@
|
|
13 |
|
14 |
import marimo
|
15 |
|
16 |
-
__generated_with = "0.11.
|
17 |
app = marimo.App(width="medium", app_title="Poisson Distribution")
|
18 |
|
19 |
|
@@ -93,7 +93,7 @@ def _(mo):
|
|
93 |
|
94 |
@app.cell(hide_code=True)
|
95 |
def _(TangleSlider, mo):
|
96 |
-
#
|
97 |
lambda_slider = mo.ui.anywidget(TangleSlider(
|
98 |
amount=5,
|
99 |
min_value=0.1,
|
@@ -118,56 +118,60 @@ def _(TangleSlider, mo):
|
|
118 |
|
119 |
@app.cell(hide_code=True)
|
120 |
def _(lambda_slider, np, plt, stats):
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
_variance = _lambda # For Poisson, variance = lambda
|
131 |
-
_std_dev = np.sqrt(_variance)
|
132 |
|
133 |
-
|
134 |
-
|
135 |
|
136 |
-
|
137 |
-
|
138 |
|
139 |
-
|
140 |
-
|
|
|
141 |
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
|
150 |
-
|
151 |
-
|
152 |
-
|
|
|
|
|
153 |
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
arrowprops=dict(facecolor='black', shrink=0.05, width=1))
|
159 |
|
160 |
-
|
161 |
-
|
162 |
-
xytext=(_mean + 1, max(_pmf) * 0.6),
|
163 |
-
arrowprops=dict(facecolor='black', shrink=0.05, width=1))
|
164 |
|
165 |
-
|
166 |
-
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
|
|
171 |
|
172 |
|
173 |
@app.cell(hide_code=True)
|
@@ -190,39 +194,44 @@ def _(mo):
|
|
190 |
|
191 |
@app.cell(hide_code=True)
|
192 |
def _(fig_to_image, mo, plt):
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
220 |
_img = mo.image(fig_to_image(_fig), width="100%")
|
221 |
|
222 |
# explanation
|
223 |
_explanation = mo.md(
|
224 |
r"""
|
225 |
In this visualization:
|
|
|
226 |
- Each rectangle represents a 1-second interval
|
227 |
- Blue rectangles indicate intervals where an event occurred
|
228 |
- Red dots show the actual event times (2.75s and 7.12s)
|
@@ -230,7 +239,8 @@ def _(fig_to_image, mo, plt):
|
|
230 |
If we treat this as a binomial experiment with 60 trials (seconds), we can calculate probabilities using the binomial PMF. But there's a problem: what if multiple events occur within the same second? To address this, we can divide our minute into smaller intervals.
|
231 |
"""
|
232 |
)
|
233 |
-
|
|
|
234 |
|
235 |
|
236 |
@app.cell(hide_code=True)
|
@@ -266,39 +276,41 @@ def _(mo):
|
|
266 |
|
267 |
|
268 |
@app.cell(hide_code=True)
|
269 |
-
def _(
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
|
|
|
|
299 |
_img = mo.image(fig_to_image(_fig), width="100%")
|
300 |
|
301 |
-
#
|
302 |
_explanation = mo.md(
|
303 |
r"""
|
304 |
With $n=600$ and $p=\frac{5}{600}=\frac{1}{120}$, we can recalculate our probabilities:
|
@@ -312,7 +324,8 @@ def _(e, fig_to_image, mo, plt):
|
|
312 |
As we make our intervals smaller (increasing $n$), our approximation becomes more accurate.
|
313 |
"""
|
314 |
)
|
315 |
-
|
|
|
316 |
|
317 |
|
318 |
@app.cell(hide_code=True)
|
@@ -329,7 +342,6 @@ def _(mo):
|
|
329 |
|
330 |
@app.cell(hide_code=True)
|
331 |
def _(mo):
|
332 |
-
# slider for number of intervals
|
333 |
intervals_slider = mo.ui.slider(
|
334 |
start = 60,
|
335 |
stop = 10000,
|
@@ -347,47 +359,54 @@ def _(intervals_slider):
|
|
347 |
|
348 |
@app.cell(hide_code=True)
|
349 |
def _(intervals_slider, np, pd, plt, stats):
|
350 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
n = intervals_slider.value
|
352 |
_lambda = 5 # Fixed lambda for our example
|
353 |
-
p = _lambda / n
|
354 |
-
|
355 |
-
# Calculate the binomial probabilities
|
356 |
-
_x_values = np.arange(0, 15)
|
357 |
-
_binom_pmf = stats.binom.pmf(_x_values, n, p)
|
358 |
-
|
359 |
-
# Calculate the true Poisson probabilities
|
360 |
-
_poisson_pmf = stats.poisson.pmf(_x_values, _lambda)
|
361 |
-
|
362 |
-
# Create a DataFrame for comparison
|
363 |
-
df = pd.DataFrame({
|
364 |
-
'Events': _x_values,
|
365 |
-
f'Binomial(n={n}, p={p:.6f})': _binom_pmf,
|
366 |
-
f'Poisson(λ=5)': _poisson_pmf,
|
367 |
-
'Difference': np.abs(_binom_pmf - _poisson_pmf)
|
368 |
-
})
|
369 |
-
|
370 |
-
# Plot both PMFs
|
371 |
-
fig, _ax = plt.subplots(figsize=(10, 6))
|
372 |
-
|
373 |
-
# Bar plot for the binomial
|
374 |
-
_ax.bar(_x_values - 0.2, _binom_pmf, width=0.4, alpha=0.7,
|
375 |
-
color='royalblue', label=f'Binomial(n={n}, p={p:.6f})')
|
376 |
-
|
377 |
-
# Bar plot for the Poisson
|
378 |
-
_ax.bar(_x_values + 0.2, _poisson_pmf, width=0.4, alpha=0.7,
|
379 |
-
color='crimson', label='Poisson(λ=5)')
|
380 |
-
|
381 |
-
# Add labels and title
|
382 |
-
_ax.set_xlabel('Number of Events (k)')
|
383 |
-
_ax.set_ylabel('Probability')
|
384 |
-
_ax.set_title(f'Comparison of Binomial and Poisson PMFs with n={n}')
|
385 |
-
_ax.legend()
|
386 |
-
_ax.set_xticks(_x_values)
|
387 |
-
_ax.grid(alpha=0.3)
|
388 |
|
389 |
-
|
390 |
-
|
|
|
391 |
|
392 |
|
393 |
@app.cell(hide_code=True)
|
@@ -399,7 +418,7 @@ def _(df, fig, fig_to_image, mo, n, p):
|
|
399 |
'Difference': '{:.6f}'
|
400 |
})
|
401 |
|
402 |
-
# Calculate the
|
403 |
_max_diff = df['Difference'].max()
|
404 |
|
405 |
# output
|
@@ -498,7 +517,6 @@ def _(mo):
|
|
498 |
|
499 |
@app.cell
|
500 |
def _(stats):
|
501 |
-
# Set lambda parameter
|
502 |
_lambda = 5
|
503 |
|
504 |
# Calculate probabilities for X = 1, 2, 3
|
@@ -528,42 +546,46 @@ def _(mo):
|
|
528 |
|
529 |
@app.cell(hide_code=True)
|
530 |
def _(np, plt, stats):
|
531 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
_lambda = 5
|
533 |
-
|
534 |
-
|
535 |
-
# theoretical PMF
|
536 |
-
_x_values = np.arange(0, max(_samples) + 1)
|
537 |
-
_pmf_values = stats.poisson.pmf(_x_values, _lambda)
|
538 |
-
|
539 |
-
# histograms to compare
|
540 |
-
_fig, _ax = plt.subplots(figsize=(10, 6))
|
541 |
-
|
542 |
-
# samples as a histogram
|
543 |
-
_ax.hist(_samples, bins=np.arange(-0.5, max(_samples) + 1.5, 1),
|
544 |
-
alpha=0.7, density=True, label='Random Samples')
|
545 |
-
|
546 |
-
# theoretical PMF
|
547 |
-
_ax.plot(_x_values, _pmf_values, 'ro-', label='Theoretical PMF')
|
548 |
-
|
549 |
-
# labels and title
|
550 |
-
_ax.set_xlabel('Number of Events')
|
551 |
-
_ax.set_ylabel('Relative Frequency / Probability')
|
552 |
-
_ax.set_title(f'1000 Random Samples from Poisson(λ={_lambda})')
|
553 |
-
_ax.legend()
|
554 |
-
_ax.grid(alpha=0.3)
|
555 |
-
|
556 |
-
# annotations
|
557 |
-
_ax.annotate(f'Sample Mean: {np.mean(_samples):.2f}',
|
558 |
-
xy=(0.7, 0.9), xycoords='axes fraction',
|
559 |
-
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3))
|
560 |
-
_ax.annotate(f'Theoretical Mean: {_lambda:.2f}',
|
561 |
-
xy=(0.7, 0.8), xycoords='axes fraction',
|
562 |
-
bbox=dict(boxstyle='round,pad=0.5', fc='lightgreen', alpha=0.3))
|
563 |
-
|
564 |
-
plt.tight_layout()
|
565 |
-
plt.gca()
|
566 |
-
return
|
567 |
|
568 |
|
569 |
@app.cell(hide_code=True)
|
@@ -584,7 +606,6 @@ def _(mo):
|
|
584 |
|
585 |
@app.cell(hide_code=True)
|
586 |
def _(mo):
|
587 |
-
# sliders for the rate and time period
|
588 |
rate_slider = mo.ui.slider(
|
589 |
start = 0.1,
|
590 |
stop = 10,
|
@@ -608,59 +629,74 @@ def _(mo):
|
|
608 |
return controls, rate_slider, time_slider
|
609 |
|
610 |
|
|
|
|
|
|
|
|
|
|
|
|
|
611 |
@app.cell(hide_code=True)
|
612 |
def _(mo, np, plt, rate_slider, stats, time_slider):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
613 |
# parameters from sliders
|
614 |
_rate = rate_slider.value
|
615 |
_time = time_slider.value
|
616 |
|
617 |
-
#
|
618 |
-
|
619 |
-
|
620 |
-
# PMF for values
|
621 |
-
_max_x = max(30, int(_lambda * 1.5))
|
622 |
-
_x = np.arange(0, _max_x + 1)
|
623 |
-
_pmf = stats.poisson.pmf(_x, _lambda)
|
624 |
|
625 |
-
#
|
626 |
-
|
627 |
|
628 |
-
|
629 |
-
|
630 |
-
label=f'PMF: Poisson(λ={_lambda:.1f})')
|
631 |
-
|
632 |
-
# vertical line for mean
|
633 |
-
_ax.axvline(x=_lambda, color='red', linestyle='--', linewidth=2,
|
634 |
-
label=f'Mean = {_lambda:.1f}')
|
635 |
-
|
636 |
-
# labels and title
|
637 |
-
_ax.set_xlabel('Number of Events')
|
638 |
-
_ax.set_ylabel('Probability')
|
639 |
-
_ax.set_title(f'Poisson Distribution Over {_time} Units (Rate = {_rate}/unit)')
|
640 |
-
|
641 |
-
# better visualization if lambda is large
|
642 |
-
if _lambda > 10:
|
643 |
-
_ax.set_xlim(_lambda - 4*np.sqrt(_lambda), _lambda + 4*np.sqrt(_lambda))
|
644 |
-
|
645 |
-
_ax.legend()
|
646 |
-
_ax.grid(alpha=0.3)
|
647 |
-
|
648 |
-
plt.tight_layout()
|
649 |
-
plt.gca()
|
650 |
-
|
651 |
-
# additional information
|
652 |
-
info = mo.md(
|
653 |
-
f"""
|
654 |
-
When the rate is **{_rate}** events per unit time and we observe for **{_time}** units:
|
655 |
-
|
656 |
-
- The expected number of events is **{_lambda:.1f}**
|
657 |
-
- The variance is also **{_lambda:.1f}**
|
658 |
-
- The standard deviation is **{np.sqrt(_lambda):.2f}**
|
659 |
-
- P(X=0) = {stats.poisson.pmf(0, _lambda):.4f} (probability of no events)
|
660 |
-
- P(X≥10) = {1 - stats.poisson.cdf(9, _lambda):.4f} (probability of 10 or more events)
|
661 |
-
"""
|
662 |
-
)
|
663 |
-
return (info,)
|
664 |
|
665 |
|
666 |
@app.cell(hide_code=True)
|
|
|
13 |
|
14 |
import marimo
|
15 |
|
16 |
+
__generated_with = "0.11.25"
|
17 |
app = marimo.App(width="medium", app_title="Poisson Distribution")
|
18 |
|
19 |
|
|
|
93 |
|
94 |
@app.cell(hide_code=True)
|
95 |
def _(TangleSlider, mo):
|
96 |
+
# interactive elements using TangleSlider
|
97 |
lambda_slider = mo.ui.anywidget(TangleSlider(
|
98 |
amount=5,
|
99 |
min_value=0.1,
|
|
|
118 |
|
119 |
@app.cell(hide_code=True)
|
120 |
def _(lambda_slider, np, plt, stats):
|
121 |
+
def create_poisson_pmf_plot(lambda_value):
|
122 |
+
"""Create a visualization of Poisson PMF with annotations for mean and variance."""
|
123 |
+
# PMF for values
|
124 |
+
max_x = max(20, int(lambda_value * 3)) # Show at least up to 3*lambda
|
125 |
+
x = np.arange(0, max_x + 1)
|
126 |
+
pmf = stats.poisson.pmf(x, lambda_value)
|
127 |
|
128 |
+
# Relevant key statistics
|
129 |
+
mean = lambda_value # For Poisson, mean = lambda
|
130 |
+
variance = lambda_value # For Poisson, variance = lambda
|
131 |
+
std_dev = np.sqrt(variance)
|
132 |
|
133 |
+
# plot
|
134 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
|
|
|
|
135 |
|
136 |
+
# PMF as bars
|
137 |
+
ax.bar(x, pmf, color='royalblue', alpha=0.7, label=f'PMF: P(X=k)')
|
138 |
|
139 |
+
# for the PMF values
|
140 |
+
ax.plot(x, pmf, 'ro-', alpha=0.6, label='PMF line')
|
141 |
|
142 |
+
# Vertical lines - mean and key values
|
143 |
+
ax.axvline(x=mean, color='green', linestyle='--', linewidth=2,
|
144 |
+
label=f'Mean: {mean:.2f}')
|
145 |
|
146 |
+
# Stdev region
|
147 |
+
ax.axvspan(mean - std_dev, mean + std_dev, alpha=0.2, color='green',
|
148 |
+
label=f'±1 Std Dev: {std_dev:.2f}')
|
149 |
|
150 |
+
ax.set_xlabel('Number of Events (k)')
|
151 |
+
ax.set_ylabel('Probability: P(X=k)')
|
152 |
+
ax.set_title(f'Poisson Distribution with λ={lambda_value:.1f}')
|
153 |
|
154 |
+
# annotations
|
155 |
+
ax.annotate(f'E[X] = {mean:.2f}',
|
156 |
+
xy=(mean, stats.poisson.pmf(int(mean), lambda_value)),
|
157 |
+
xytext=(mean + 1, max(pmf) * 0.8),
|
158 |
+
arrowprops=dict(facecolor='black', shrink=0.05, width=1))
|
159 |
|
160 |
+
ax.annotate(f'Var(X) = {variance:.2f}',
|
161 |
+
xy=(mean, stats.poisson.pmf(int(mean), lambda_value) / 2),
|
162 |
+
xytext=(mean + 1, max(pmf) * 0.6),
|
163 |
+
arrowprops=dict(facecolor='black', shrink=0.05, width=1))
|
|
|
164 |
|
165 |
+
ax.grid(alpha=0.3)
|
166 |
+
ax.legend()
|
|
|
|
|
167 |
|
168 |
+
plt.tight_layout()
|
169 |
+
return plt.gca()
|
170 |
|
171 |
+
# Get parameter from slider and create plot
|
172 |
+
_lambda = lambda_slider.amount
|
173 |
+
create_poisson_pmf_plot(_lambda)
|
174 |
+
return (create_poisson_pmf_plot,)
|
175 |
|
176 |
|
177 |
@app.cell(hide_code=True)
|
|
|
194 |
|
195 |
@app.cell(hide_code=True)
|
196 |
def _(fig_to_image, mo, plt):
|
197 |
+
def create_time_division_visualization():
|
198 |
+
# vizualization of dividing a minute into 60 seconds
|
199 |
+
fig, ax = plt.subplots(figsize=(12, 2))
|
200 |
+
|
201 |
+
# Example events harcoded at 2.75s and 7.12s
|
202 |
+
events = [2.75, 7.12]
|
203 |
+
|
204 |
+
# array of 60 rectangles
|
205 |
+
for i in range(60):
|
206 |
+
color = 'royalblue' if any(i <= e < i+1 for e in events) else 'lightgray'
|
207 |
+
ax.add_patch(plt.Rectangle((i, 0), 0.9, 1, color=color))
|
208 |
+
|
209 |
+
# markers for events
|
210 |
+
for e in events:
|
211 |
+
ax.plot(e, 0.5, 'ro', markersize=10)
|
212 |
+
|
213 |
+
# labels
|
214 |
+
ax.set_xlim(0, 60)
|
215 |
+
ax.set_ylim(0, 1)
|
216 |
+
ax.set_yticks([])
|
217 |
+
ax.set_xticks([0, 15, 30, 45, 60])
|
218 |
+
ax.set_xticklabels(['0s', '15s', '30s', '45s', '60s'])
|
219 |
+
ax.set_xlabel('Time (seconds)')
|
220 |
+
ax.set_title('One Minute Divided into 60 Second Intervals')
|
221 |
+
|
222 |
+
plt.tight_layout()
|
223 |
+
plt.gca()
|
224 |
+
return fig, events, i
|
225 |
+
|
226 |
+
# Create visualization and convert to image
|
227 |
+
_fig, _events, i = create_time_division_visualization()
|
228 |
_img = mo.image(fig_to_image(_fig), width="100%")
|
229 |
|
230 |
# explanation
|
231 |
_explanation = mo.md(
|
232 |
r"""
|
233 |
In this visualization:
|
234 |
+
|
235 |
- Each rectangle represents a 1-second interval
|
236 |
- Blue rectangles indicate intervals where an event occurred
|
237 |
- Red dots show the actual event times (2.75s and 7.12s)
|
|
|
239 |
If we treat this as a binomial experiment with 60 trials (seconds), we can calculate probabilities using the binomial PMF. But there's a problem: what if multiple events occur within the same second? To address this, we can divide our minute into smaller intervals.
|
240 |
"""
|
241 |
)
|
242 |
+
mo.vstack([_fig, _explanation])
|
243 |
+
return create_time_division_visualization, i
|
244 |
|
245 |
|
246 |
@app.cell(hide_code=True)
|
|
|
276 |
|
277 |
|
278 |
@app.cell(hide_code=True)
|
279 |
+
def _(fig_to_image, mo, plt):
|
280 |
+
def create_decisecond_visualization(e_value):
|
281 |
+
# (Just showing the first 100 for clarity)
|
282 |
+
fig, ax = plt.subplots(figsize=(12, 2))
|
283 |
+
|
284 |
+
# Example events at 2.75s and 7.12s (convert to deciseconds)
|
285 |
+
events = [27.5, 71.2]
|
286 |
+
|
287 |
+
for i in range(100):
|
288 |
+
color = 'royalblue' if any(i <= event_val < i + 1 for event_val in events) else 'lightgray'
|
289 |
+
ax.add_patch(plt.Rectangle((i, 0), 0.9, 1, color=color))
|
290 |
+
|
291 |
+
# Markers for events
|
292 |
+
for event in events:
|
293 |
+
if event < 100: # Only show events in our visible range
|
294 |
+
ax.plot(event/10, 0.5, 'ro', markersize=10) # Divide by 10 to convert to deciseconds
|
295 |
+
|
296 |
+
# Add labels
|
297 |
+
ax.set_xlim(0, 100)
|
298 |
+
ax.set_ylim(0, 1)
|
299 |
+
ax.set_yticks([])
|
300 |
+
ax.set_xticks([0, 20, 40, 60, 80, 100])
|
301 |
+
ax.set_xticklabels(['0s', '2s', '4s', '6s', '8s', '10s'])
|
302 |
+
ax.set_xlabel('Time (first 10 seconds shown)')
|
303 |
+
ax.set_title('One Minute Divided into 600 Decisecond Intervals (first 100 shown)')
|
304 |
+
|
305 |
+
plt.tight_layout()
|
306 |
+
plt.gca()
|
307 |
+
return fig
|
308 |
+
|
309 |
+
# Create viz and convert to image
|
310 |
+
_fig = create_decisecond_visualization(e_value=5)
|
311 |
_img = mo.image(fig_to_image(_fig), width="100%")
|
312 |
|
313 |
+
# Explanation
|
314 |
_explanation = mo.md(
|
315 |
r"""
|
316 |
With $n=600$ and $p=\frac{5}{600}=\frac{1}{120}$, we can recalculate our probabilities:
|
|
|
324 |
As we make our intervals smaller (increasing $n$), our approximation becomes more accurate.
|
325 |
"""
|
326 |
)
|
327 |
+
mo.vstack([_fig, _explanation])
|
328 |
+
return (create_decisecond_visualization,)
|
329 |
|
330 |
|
331 |
@app.cell(hide_code=True)
|
|
|
342 |
|
343 |
@app.cell(hide_code=True)
|
344 |
def _(mo):
|
|
|
345 |
intervals_slider = mo.ui.slider(
|
346 |
start = 60,
|
347 |
stop = 10000,
|
|
|
359 |
|
360 |
@app.cell(hide_code=True)
|
361 |
def _(intervals_slider, np, pd, plt, stats):
|
362 |
+
def create_comparison_plot(n, lambda_value):
|
363 |
+
# Calculate probability
|
364 |
+
p = lambda_value / n
|
365 |
+
|
366 |
+
# Binomial probabilities
|
367 |
+
x_values = np.arange(0, 15)
|
368 |
+
binom_pmf = stats.binom.pmf(x_values, n, p)
|
369 |
+
|
370 |
+
# True Poisson probabilities
|
371 |
+
poisson_pmf = stats.poisson.pmf(x_values, lambda_value)
|
372 |
+
|
373 |
+
# DF for comparison
|
374 |
+
df = pd.DataFrame({
|
375 |
+
'Events': x_values,
|
376 |
+
f'Binomial(n={n}, p={p:.6f})': binom_pmf,
|
377 |
+
f'Poisson(λ=5)': poisson_pmf,
|
378 |
+
'Difference': np.abs(binom_pmf - poisson_pmf)
|
379 |
+
})
|
380 |
+
|
381 |
+
# Plot both PMFs
|
382 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
383 |
+
|
384 |
+
# Bar plot for the binomial
|
385 |
+
ax.bar(x_values - 0.2, binom_pmf, width=0.4, alpha=0.7,
|
386 |
+
color='royalblue', label=f'Binomial(n={n}, p={p:.6f})')
|
387 |
+
|
388 |
+
# Bar plot for the Poisson
|
389 |
+
ax.bar(x_values + 0.2, poisson_pmf, width=0.4, alpha=0.7,
|
390 |
+
color='crimson', label='Poisson(λ=5)')
|
391 |
+
|
392 |
+
# Labels and title
|
393 |
+
ax.set_xlabel('Number of Events (k)')
|
394 |
+
ax.set_ylabel('Probability')
|
395 |
+
ax.set_title(f'Comparison of Binomial and Poisson PMFs with n={n}')
|
396 |
+
ax.legend()
|
397 |
+
ax.set_xticks(x_values)
|
398 |
+
ax.grid(alpha=0.3)
|
399 |
+
|
400 |
+
plt.tight_layout()
|
401 |
+
return df, fig, n, p
|
402 |
+
|
403 |
+
# Number of intervals from the slider
|
404 |
n = intervals_slider.value
|
405 |
_lambda = 5 # Fixed lambda for our example
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
406 |
|
407 |
+
# Cromparison plot
|
408 |
+
df, fig, n, p = create_comparison_plot(n, _lambda)
|
409 |
+
return create_comparison_plot, df, fig, n, p
|
410 |
|
411 |
|
412 |
@app.cell(hide_code=True)
|
|
|
418 |
'Difference': '{:.6f}'
|
419 |
})
|
420 |
|
421 |
+
# Calculate the max absolute difference
|
422 |
_max_diff = df['Difference'].max()
|
423 |
|
424 |
# output
|
|
|
517 |
|
518 |
@app.cell
|
519 |
def _(stats):
|
|
|
520 |
_lambda = 5
|
521 |
|
522 |
# Calculate probabilities for X = 1, 2, 3
|
|
|
546 |
|
547 |
@app.cell(hide_code=True)
|
548 |
def _(np, plt, stats):
|
549 |
+
def create_samples_plot(lambda_value, sample_size=1000):
|
550 |
+
# Random samples
|
551 |
+
samples = stats.poisson.rvs(lambda_value, size=sample_size)
|
552 |
+
|
553 |
+
# theoretical PMF
|
554 |
+
x_values = np.arange(0, max(samples) + 1)
|
555 |
+
pmf_values = stats.poisson.pmf(x_values, lambda_value)
|
556 |
+
|
557 |
+
# histograms to compare
|
558 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
559 |
+
|
560 |
+
# samples as a histogram
|
561 |
+
ax.hist(samples, bins=np.arange(-0.5, max(samples) + 1.5, 1),
|
562 |
+
alpha=0.7, density=True, label='Random Samples')
|
563 |
+
|
564 |
+
# theoretical PMF
|
565 |
+
ax.plot(x_values, pmf_values, 'ro-', label='Theoretical PMF')
|
566 |
+
|
567 |
+
# labels and title
|
568 |
+
ax.set_xlabel('Number of Events')
|
569 |
+
ax.set_ylabel('Relative Frequency / Probability')
|
570 |
+
ax.set_title(f'1000 Random Samples from Poisson(λ={lambda_value})')
|
571 |
+
ax.legend()
|
572 |
+
ax.grid(alpha=0.3)
|
573 |
+
|
574 |
+
# annotations
|
575 |
+
ax.annotate(f'Sample Mean: {np.mean(samples):.2f}',
|
576 |
+
xy=(0.7, 0.9), xycoords='axes fraction',
|
577 |
+
bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3))
|
578 |
+
ax.annotate(f'Theoretical Mean: {lambda_value:.2f}',
|
579 |
+
xy=(0.7, 0.8), xycoords='axes fraction',
|
580 |
+
bbox=dict(boxstyle='round,pad=0.5', fc='lightgreen', alpha=0.3))
|
581 |
+
|
582 |
+
plt.tight_layout()
|
583 |
+
return plt.gca()
|
584 |
+
|
585 |
+
# Use a lambda value of 5 for this example
|
586 |
_lambda = 5
|
587 |
+
create_samples_plot(_lambda)
|
588 |
+
return (create_samples_plot,)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
589 |
|
590 |
|
591 |
@app.cell(hide_code=True)
|
|
|
606 |
|
607 |
@app.cell(hide_code=True)
|
608 |
def _(mo):
|
|
|
609 |
rate_slider = mo.ui.slider(
|
610 |
start = 0.1,
|
611 |
stop = 10,
|
|
|
629 |
return controls, rate_slider, time_slider
|
630 |
|
631 |
|
632 |
+
@app.cell
|
633 |
+
def _(controls):
|
634 |
+
controls.center()
|
635 |
+
return
|
636 |
+
|
637 |
+
|
638 |
@app.cell(hide_code=True)
|
639 |
def _(mo, np, plt, rate_slider, stats, time_slider):
|
640 |
+
def create_time_scaling_plot(rate, time_period):
|
641 |
+
# scaled rate parameter
|
642 |
+
lambda_value = rate * time_period
|
643 |
+
|
644 |
+
# PMF for values
|
645 |
+
max_x = max(30, int(lambda_value * 1.5))
|
646 |
+
x = np.arange(0, max_x + 1)
|
647 |
+
pmf = stats.poisson.pmf(x, lambda_value)
|
648 |
+
|
649 |
+
# plot
|
650 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
651 |
+
|
652 |
+
# PMF as bars
|
653 |
+
ax.bar(x, pmf, color='royalblue', alpha=0.7,
|
654 |
+
label=f'PMF: Poisson(λ={lambda_value:.1f})')
|
655 |
+
|
656 |
+
# vertical line for mean
|
657 |
+
ax.axvline(x=lambda_value, color='red', linestyle='--', linewidth=2,
|
658 |
+
label=f'Mean = {lambda_value:.1f}')
|
659 |
+
|
660 |
+
# labels and title
|
661 |
+
ax.set_xlabel('Number of Events')
|
662 |
+
ax.set_ylabel('Probability')
|
663 |
+
ax.set_title(f'Poisson Distribution Over {time_period} Units (Rate = {rate}/unit)')
|
664 |
+
|
665 |
+
# better visualization if lambda is large
|
666 |
+
if lambda_value > 10:
|
667 |
+
ax.set_xlim(lambda_value - 4*np.sqrt(lambda_value),
|
668 |
+
lambda_value + 4*np.sqrt(lambda_value))
|
669 |
+
|
670 |
+
ax.legend()
|
671 |
+
ax.grid(alpha=0.3)
|
672 |
+
|
673 |
+
plt.tight_layout()
|
674 |
+
|
675 |
+
# Create relevant info markdown
|
676 |
+
info_text = f"""
|
677 |
+
When the rate is **{rate}** events per unit time and we observe for **{time_period}** units:
|
678 |
+
|
679 |
+
- The expected number of events is **{lambda_value:.1f}**
|
680 |
+
- The variance is also **{lambda_value:.1f}**
|
681 |
+
- The standard deviation is **{np.sqrt(lambda_value):.2f}**
|
682 |
+
- P(X=0) = {stats.poisson.pmf(0, lambda_value):.4f} (probability of no events)
|
683 |
+
- P(X≥10) = {1 - stats.poisson.cdf(9, lambda_value):.4f} (probability of 10 or more events)
|
684 |
+
"""
|
685 |
+
|
686 |
+
return plt.gca(), info_text
|
687 |
+
|
688 |
# parameters from sliders
|
689 |
_rate = rate_slider.value
|
690 |
_time = time_slider.value
|
691 |
|
692 |
+
# store
|
693 |
+
_plot, _info_text = create_time_scaling_plot(_rate, _time)
|
|
|
|
|
|
|
|
|
|
|
694 |
|
695 |
+
# Display info as markdown
|
696 |
+
info = mo.md(_info_text)
|
697 |
|
698 |
+
mo.vstack([_plot, info], justify="center")
|
699 |
+
return create_time_scaling_plot, info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
700 |
|
701 |
|
702 |
@app.cell(hide_code=True)
|