Haleshot commited on
Commit
a651a65
·
unverified ·
1 Parent(s): 5e6566d

Refactor plotting functions for clarity and modularity.

Browse files
Files changed (1) hide show
  1. probability/15_poisson_distribution.py +258 -222
probability/15_poisson_distribution.py CHANGED
@@ -13,7 +13,7 @@
13
 
14
  import marimo
15
 
16
- __generated_with = "0.11.24"
17
  app = marimo.App(width="medium", app_title="Poisson Distribution")
18
 
19
 
@@ -93,7 +93,7 @@ def _(mo):
93
 
94
  @app.cell(hide_code=True)
95
  def _(TangleSlider, mo):
96
- # Create interactive elements using TangleSlider
97
  lambda_slider = mo.ui.anywidget(TangleSlider(
98
  amount=5,
99
  min_value=0.1,
@@ -118,56 +118,60 @@ def _(TangleSlider, mo):
118
 
119
  @app.cell(hide_code=True)
120
  def _(lambda_slider, np, plt, stats):
121
- _lambda = lambda_slider.amount
 
 
 
 
 
122
 
123
- # PMF for values
124
- _max_x = max(20, int(_lambda * 3)) # Show at least up to 3*lambda
125
- _x = np.arange(0, _max_x + 1)
126
- _pmf = stats.poisson.pmf(_x, _lambda)
127
 
128
- # Relevant key statistics
129
- _mean = _lambda # For Poisson, mean = lambda
130
- _variance = _lambda # For Poisson, variance = lambda
131
- _std_dev = np.sqrt(_variance)
132
 
133
- # plot
134
- _fig, _ax = plt.subplots(figsize=(10, 6))
135
 
136
- # PMF as bars
137
- _ax.bar(_x, _pmf, color='royalblue', alpha=0.7, label=f'PMF: P(X=k)')
138
 
139
- # for the PMF values
140
- _ax.plot(_x, _pmf, 'ro-', alpha=0.6, label='PMF line')
 
141
 
142
- # Vertical lines - mean and key values
143
- _ax.axvline(x=_mean, color='green', linestyle='--', linewidth=2,
144
- label=f'Mean: {_mean:.2f}')
145
 
146
- # Stdev region
147
- _ax.axvspan(_mean - _std_dev, _mean + _std_dev, alpha=0.2, color='green',
148
- label=f'±1 Std Dev: {_std_dev:.2f}')
149
 
150
- _ax.set_xlabel('Number of Events (k)')
151
- _ax.set_ylabel('Probability: P(X=k)')
152
- _ax.set_title(f'Poisson Distribution with λ={_lambda:.1f}')
 
 
153
 
154
- # annotations
155
- _ax.annotate(f'E[X] = {_mean:.2f}',
156
- xy=(_mean, stats.poisson.pmf(int(_mean), _lambda)),
157
- xytext=(_mean + 1, max(_pmf) * 0.8),
158
- arrowprops=dict(facecolor='black', shrink=0.05, width=1))
159
 
160
- _ax.annotate(f'Var(X) = {_variance:.2f}',
161
- xy=(_mean, stats.poisson.pmf(int(_mean), _lambda) / 2),
162
- xytext=(_mean + 1, max(_pmf) * 0.6),
163
- arrowprops=dict(facecolor='black', shrink=0.05, width=1))
164
 
165
- _ax.grid(alpha=0.3)
166
- _ax.legend()
167
 
168
- plt.tight_layout()
169
- plt.gca()
170
- return
 
171
 
172
 
173
  @app.cell(hide_code=True)
@@ -190,39 +194,44 @@ def _(mo):
190
 
191
  @app.cell(hide_code=True)
192
  def _(fig_to_image, mo, plt):
193
- # Create a visualization of dividing a minute into 60 seconds
194
- _fig, _ax = plt.subplots(figsize=(12, 2))
195
-
196
- # Example events at 2.75s and 7.12s
197
- _events = [2.75, 7.12]
198
-
199
- # Create an array of 60 rectangles
200
- for i in range(60):
201
- _color = 'royalblue' if any(i <= e < i+1 for e in _events) else 'lightgray'
202
- _ax.add_patch(plt.Rectangle((i, 0), 0.9, 1, color=_color))
203
-
204
- # markers for events
205
- for e in _events:
206
- _ax.plot(e, 0.5, 'ro', markersize=10)
207
-
208
- # labels
209
- _ax.set_xlim(0, 60)
210
- _ax.set_ylim(0, 1)
211
- _ax.set_yticks([])
212
- _ax.set_xticks([0, 15, 30, 45, 60])
213
- _ax.set_xticklabels(['0s', '15s', '30s', '45s', '60s'])
214
- _ax.set_xlabel('Time (seconds)')
215
- _ax.set_title('One Minute Divided into 60 Second Intervals')
216
-
217
- plt.tight_layout()
218
-
219
- # Convert plot to image for display
 
 
 
 
220
  _img = mo.image(fig_to_image(_fig), width="100%")
221
 
222
  # explanation
223
  _explanation = mo.md(
224
  r"""
225
  In this visualization:
 
226
  - Each rectangle represents a 1-second interval
227
  - Blue rectangles indicate intervals where an event occurred
228
  - Red dots show the actual event times (2.75s and 7.12s)
@@ -230,7 +239,8 @@ def _(fig_to_image, mo, plt):
230
  If we treat this as a binomial experiment with 60 trials (seconds), we can calculate probabilities using the binomial PMF. But there's a problem: what if multiple events occur within the same second? To address this, we can divide our minute into smaller intervals.
231
  """
232
  )
233
- return e, i
 
234
 
235
 
236
  @app.cell(hide_code=True)
@@ -266,39 +276,41 @@ def _(mo):
266
 
267
 
268
  @app.cell(hide_code=True)
269
- def _(e, fig_to_image, mo, plt):
270
- # Create a visualization of dividing a minute into 600 deciseconds
271
- # (Just showing the first 100 for clarity)
272
- _fig, _ax = plt.subplots(figsize=(12, 2))
273
-
274
- # Example events at 2.75s and 7.12s (convert to deciseconds)
275
- _events = [27.5, 71.2]
276
-
277
- # Create a representative portion of the 600 rectangles (first 100)
278
- for _i in range(100):
279
- _color = 'royalblue' if any(_i <= _e < _i + 1 for _e in _events) else 'lightgray'
280
- _ax.add_patch(plt.Rectangle((_i, 0), 0.9, 1, color=_color))
281
-
282
- # Add markers for events
283
- for _e in _events:
284
- if _e < 100: # Only show events in our visible range
285
- _ax.plot(e, 0.5, 'ro', markersize=10)
286
-
287
- # Add labels
288
- _ax.set_xlim(0, 100)
289
- _ax.set_ylim(0, 1)
290
- _ax.set_yticks([])
291
- _ax.set_xticks([0, 20, 40, 60, 80, 100])
292
- _ax.set_xticklabels(['0s', '2s', '4s', '6s', '8s', '10s'])
293
- _ax.set_xlabel('Time (first 10 seconds shown)')
294
- _ax.set_title('One Minute Divided into 600 Decisecond Intervals (first 100 shown)')
295
-
296
- plt.tight_layout()
297
-
298
- # Convert plot to image for display
 
 
299
  _img = mo.image(fig_to_image(_fig), width="100%")
300
 
301
- # Add explanation
302
  _explanation = mo.md(
303
  r"""
304
  With $n=600$ and $p=\frac{5}{600}=\frac{1}{120}$, we can recalculate our probabilities:
@@ -312,7 +324,8 @@ def _(e, fig_to_image, mo, plt):
312
  As we make our intervals smaller (increasing $n$), our approximation becomes more accurate.
313
  """
314
  )
315
- return
 
316
 
317
 
318
  @app.cell(hide_code=True)
@@ -329,7 +342,6 @@ def _(mo):
329
 
330
  @app.cell(hide_code=True)
331
  def _(mo):
332
- # slider for number of intervals
333
  intervals_slider = mo.ui.slider(
334
  start = 60,
335
  stop = 10000,
@@ -347,47 +359,54 @@ def _(intervals_slider):
347
 
348
  @app.cell(hide_code=True)
349
  def _(intervals_slider, np, pd, plt, stats):
350
- # number of intervals from the slider
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  n = intervals_slider.value
352
  _lambda = 5 # Fixed lambda for our example
353
- p = _lambda / n
354
-
355
- # Calculate the binomial probabilities
356
- _x_values = np.arange(0, 15)
357
- _binom_pmf = stats.binom.pmf(_x_values, n, p)
358
-
359
- # Calculate the true Poisson probabilities
360
- _poisson_pmf = stats.poisson.pmf(_x_values, _lambda)
361
-
362
- # Create a DataFrame for comparison
363
- df = pd.DataFrame({
364
- 'Events': _x_values,
365
- f'Binomial(n={n}, p={p:.6f})': _binom_pmf,
366
- f'Poisson(λ=5)': _poisson_pmf,
367
- 'Difference': np.abs(_binom_pmf - _poisson_pmf)
368
- })
369
-
370
- # Plot both PMFs
371
- fig, _ax = plt.subplots(figsize=(10, 6))
372
-
373
- # Bar plot for the binomial
374
- _ax.bar(_x_values - 0.2, _binom_pmf, width=0.4, alpha=0.7,
375
- color='royalblue', label=f'Binomial(n={n}, p={p:.6f})')
376
-
377
- # Bar plot for the Poisson
378
- _ax.bar(_x_values + 0.2, _poisson_pmf, width=0.4, alpha=0.7,
379
- color='crimson', label='Poisson(λ=5)')
380
-
381
- # Add labels and title
382
- _ax.set_xlabel('Number of Events (k)')
383
- _ax.set_ylabel('Probability')
384
- _ax.set_title(f'Comparison of Binomial and Poisson PMFs with n={n}')
385
- _ax.legend()
386
- _ax.set_xticks(_x_values)
387
- _ax.grid(alpha=0.3)
388
 
389
- plt.tight_layout()
390
- return df, fig, n, p
 
391
 
392
 
393
  @app.cell(hide_code=True)
@@ -399,7 +418,7 @@ def _(df, fig, fig_to_image, mo, n, p):
399
  'Difference': '{:.6f}'
400
  })
401
 
402
- # Calculate the maximum absolute difference
403
  _max_diff = df['Difference'].max()
404
 
405
  # output
@@ -498,7 +517,6 @@ def _(mo):
498
 
499
  @app.cell
500
  def _(stats):
501
- # Set lambda parameter
502
  _lambda = 5
503
 
504
  # Calculate probabilities for X = 1, 2, 3
@@ -528,42 +546,46 @@ def _(mo):
528
 
529
  @app.cell(hide_code=True)
530
  def _(np, plt, stats):
531
- # 1000 random samples from Poisson(lambda=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  _lambda = 5
533
- _samples = stats.poisson.rvs(_lambda, size=1000)
534
-
535
- # theoretical PMF
536
- _x_values = np.arange(0, max(_samples) + 1)
537
- _pmf_values = stats.poisson.pmf(_x_values, _lambda)
538
-
539
- # histograms to compare
540
- _fig, _ax = plt.subplots(figsize=(10, 6))
541
-
542
- # samples as a histogram
543
- _ax.hist(_samples, bins=np.arange(-0.5, max(_samples) + 1.5, 1),
544
- alpha=0.7, density=True, label='Random Samples')
545
-
546
- # theoretical PMF
547
- _ax.plot(_x_values, _pmf_values, 'ro-', label='Theoretical PMF')
548
-
549
- # labels and title
550
- _ax.set_xlabel('Number of Events')
551
- _ax.set_ylabel('Relative Frequency / Probability')
552
- _ax.set_title(f'1000 Random Samples from Poisson(λ={_lambda})')
553
- _ax.legend()
554
- _ax.grid(alpha=0.3)
555
-
556
- # annotations
557
- _ax.annotate(f'Sample Mean: {np.mean(_samples):.2f}',
558
- xy=(0.7, 0.9), xycoords='axes fraction',
559
- bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3))
560
- _ax.annotate(f'Theoretical Mean: {_lambda:.2f}',
561
- xy=(0.7, 0.8), xycoords='axes fraction',
562
- bbox=dict(boxstyle='round,pad=0.5', fc='lightgreen', alpha=0.3))
563
-
564
- plt.tight_layout()
565
- plt.gca()
566
- return
567
 
568
 
569
  @app.cell(hide_code=True)
@@ -584,7 +606,6 @@ def _(mo):
584
 
585
  @app.cell(hide_code=True)
586
  def _(mo):
587
- # sliders for the rate and time period
588
  rate_slider = mo.ui.slider(
589
  start = 0.1,
590
  stop = 10,
@@ -608,59 +629,74 @@ def _(mo):
608
  return controls, rate_slider, time_slider
609
 
610
 
 
 
 
 
 
 
611
  @app.cell(hide_code=True)
612
  def _(mo, np, plt, rate_slider, stats, time_slider):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  # parameters from sliders
614
  _rate = rate_slider.value
615
  _time = time_slider.value
616
 
617
- # scaled rate parameter
618
- _lambda = _rate * _time
619
-
620
- # PMF for values
621
- _max_x = max(30, int(_lambda * 1.5))
622
- _x = np.arange(0, _max_x + 1)
623
- _pmf = stats.poisson.pmf(_x, _lambda)
624
 
625
- # plot
626
- _fig, _ax = plt.subplots(figsize=(10, 6))
627
 
628
- # PMF as bars
629
- _ax.bar(_x, _pmf, color='royalblue', alpha=0.7,
630
- label=f'PMF: Poisson(λ={_lambda:.1f})')
631
-
632
- # vertical line for mean
633
- _ax.axvline(x=_lambda, color='red', linestyle='--', linewidth=2,
634
- label=f'Mean = {_lambda:.1f}')
635
-
636
- # labels and title
637
- _ax.set_xlabel('Number of Events')
638
- _ax.set_ylabel('Probability')
639
- _ax.set_title(f'Poisson Distribution Over {_time} Units (Rate = {_rate}/unit)')
640
-
641
- # better visualization if lambda is large
642
- if _lambda > 10:
643
- _ax.set_xlim(_lambda - 4*np.sqrt(_lambda), _lambda + 4*np.sqrt(_lambda))
644
-
645
- _ax.legend()
646
- _ax.grid(alpha=0.3)
647
-
648
- plt.tight_layout()
649
- plt.gca()
650
-
651
- # additional information
652
- info = mo.md(
653
- f"""
654
- When the rate is **{_rate}** events per unit time and we observe for **{_time}** units:
655
-
656
- - The expected number of events is **{_lambda:.1f}**
657
- - The variance is also **{_lambda:.1f}**
658
- - The standard deviation is **{np.sqrt(_lambda):.2f}**
659
- - P(X=0) = {stats.poisson.pmf(0, _lambda):.4f} (probability of no events)
660
- - P(X≥10) = {1 - stats.poisson.cdf(9, _lambda):.4f} (probability of 10 or more events)
661
- """
662
- )
663
- return (info,)
664
 
665
 
666
  @app.cell(hide_code=True)
 
13
 
14
  import marimo
15
 
16
+ __generated_with = "0.11.25"
17
  app = marimo.App(width="medium", app_title="Poisson Distribution")
18
 
19
 
 
93
 
94
  @app.cell(hide_code=True)
95
  def _(TangleSlider, mo):
96
+ # interactive elements using TangleSlider
97
  lambda_slider = mo.ui.anywidget(TangleSlider(
98
  amount=5,
99
  min_value=0.1,
 
118
 
119
  @app.cell(hide_code=True)
120
  def _(lambda_slider, np, plt, stats):
121
+ def create_poisson_pmf_plot(lambda_value):
122
+ """Create a visualization of Poisson PMF with annotations for mean and variance."""
123
+ # PMF for values
124
+ max_x = max(20, int(lambda_value * 3)) # Show at least up to 3*lambda
125
+ x = np.arange(0, max_x + 1)
126
+ pmf = stats.poisson.pmf(x, lambda_value)
127
 
128
+ # Relevant key statistics
129
+ mean = lambda_value # For Poisson, mean = lambda
130
+ variance = lambda_value # For Poisson, variance = lambda
131
+ std_dev = np.sqrt(variance)
132
 
133
+ # plot
134
+ fig, ax = plt.subplots(figsize=(10, 6))
 
 
135
 
136
+ # PMF as bars
137
+ ax.bar(x, pmf, color='royalblue', alpha=0.7, label=f'PMF: P(X=k)')
138
 
139
+ # for the PMF values
140
+ ax.plot(x, pmf, 'ro-', alpha=0.6, label='PMF line')
141
 
142
+ # Vertical lines - mean and key values
143
+ ax.axvline(x=mean, color='green', linestyle='--', linewidth=2,
144
+ label=f'Mean: {mean:.2f}')
145
 
146
+ # Stdev region
147
+ ax.axvspan(mean - std_dev, mean + std_dev, alpha=0.2, color='green',
148
+ label=f'±1 Std Dev: {std_dev:.2f}')
149
 
150
+ ax.set_xlabel('Number of Events (k)')
151
+ ax.set_ylabel('Probability: P(X=k)')
152
+ ax.set_title(f'Poisson Distribution with λ={lambda_value:.1f}')
153
 
154
+ # annotations
155
+ ax.annotate(f'E[X] = {mean:.2f}',
156
+ xy=(mean, stats.poisson.pmf(int(mean), lambda_value)),
157
+ xytext=(mean + 1, max(pmf) * 0.8),
158
+ arrowprops=dict(facecolor='black', shrink=0.05, width=1))
159
 
160
+ ax.annotate(f'Var(X) = {variance:.2f}',
161
+ xy=(mean, stats.poisson.pmf(int(mean), lambda_value) / 2),
162
+ xytext=(mean + 1, max(pmf) * 0.6),
163
+ arrowprops=dict(facecolor='black', shrink=0.05, width=1))
 
164
 
165
+ ax.grid(alpha=0.3)
166
+ ax.legend()
 
 
167
 
168
+ plt.tight_layout()
169
+ return plt.gca()
170
 
171
+ # Get parameter from slider and create plot
172
+ _lambda = lambda_slider.amount
173
+ create_poisson_pmf_plot(_lambda)
174
+ return (create_poisson_pmf_plot,)
175
 
176
 
177
  @app.cell(hide_code=True)
 
194
 
195
  @app.cell(hide_code=True)
196
  def _(fig_to_image, mo, plt):
197
+ def create_time_division_visualization():
198
+ # vizualization of dividing a minute into 60 seconds
199
+ fig, ax = plt.subplots(figsize=(12, 2))
200
+
201
+ # Example events harcoded at 2.75s and 7.12s
202
+ events = [2.75, 7.12]
203
+
204
+ # array of 60 rectangles
205
+ for i in range(60):
206
+ color = 'royalblue' if any(i <= e < i+1 for e in events) else 'lightgray'
207
+ ax.add_patch(plt.Rectangle((i, 0), 0.9, 1, color=color))
208
+
209
+ # markers for events
210
+ for e in events:
211
+ ax.plot(e, 0.5, 'ro', markersize=10)
212
+
213
+ # labels
214
+ ax.set_xlim(0, 60)
215
+ ax.set_ylim(0, 1)
216
+ ax.set_yticks([])
217
+ ax.set_xticks([0, 15, 30, 45, 60])
218
+ ax.set_xticklabels(['0s', '15s', '30s', '45s', '60s'])
219
+ ax.set_xlabel('Time (seconds)')
220
+ ax.set_title('One Minute Divided into 60 Second Intervals')
221
+
222
+ plt.tight_layout()
223
+ plt.gca()
224
+ return fig, events, i
225
+
226
+ # Create visualization and convert to image
227
+ _fig, _events, i = create_time_division_visualization()
228
  _img = mo.image(fig_to_image(_fig), width="100%")
229
 
230
  # explanation
231
  _explanation = mo.md(
232
  r"""
233
  In this visualization:
234
+
235
  - Each rectangle represents a 1-second interval
236
  - Blue rectangles indicate intervals where an event occurred
237
  - Red dots show the actual event times (2.75s and 7.12s)
 
239
  If we treat this as a binomial experiment with 60 trials (seconds), we can calculate probabilities using the binomial PMF. But there's a problem: what if multiple events occur within the same second? To address this, we can divide our minute into smaller intervals.
240
  """
241
  )
242
+ mo.vstack([_fig, _explanation])
243
+ return create_time_division_visualization, i
244
 
245
 
246
  @app.cell(hide_code=True)
 
276
 
277
 
278
  @app.cell(hide_code=True)
279
+ def _(fig_to_image, mo, plt):
280
+ def create_decisecond_visualization(e_value):
281
+ # (Just showing the first 100 for clarity)
282
+ fig, ax = plt.subplots(figsize=(12, 2))
283
+
284
+ # Example events at 2.75s and 7.12s (convert to deciseconds)
285
+ events = [27.5, 71.2]
286
+
287
+ for i in range(100):
288
+ color = 'royalblue' if any(i <= event_val < i + 1 for event_val in events) else 'lightgray'
289
+ ax.add_patch(plt.Rectangle((i, 0), 0.9, 1, color=color))
290
+
291
+ # Markers for events
292
+ for event in events:
293
+ if event < 100: # Only show events in our visible range
294
+ ax.plot(event/10, 0.5, 'ro', markersize=10) # Divide by 10 to convert to deciseconds
295
+
296
+ # Add labels
297
+ ax.set_xlim(0, 100)
298
+ ax.set_ylim(0, 1)
299
+ ax.set_yticks([])
300
+ ax.set_xticks([0, 20, 40, 60, 80, 100])
301
+ ax.set_xticklabels(['0s', '2s', '4s', '6s', '8s', '10s'])
302
+ ax.set_xlabel('Time (first 10 seconds shown)')
303
+ ax.set_title('One Minute Divided into 600 Decisecond Intervals (first 100 shown)')
304
+
305
+ plt.tight_layout()
306
+ plt.gca()
307
+ return fig
308
+
309
+ # Create viz and convert to image
310
+ _fig = create_decisecond_visualization(e_value=5)
311
  _img = mo.image(fig_to_image(_fig), width="100%")
312
 
313
+ # Explanation
314
  _explanation = mo.md(
315
  r"""
316
  With $n=600$ and $p=\frac{5}{600}=\frac{1}{120}$, we can recalculate our probabilities:
 
324
  As we make our intervals smaller (increasing $n$), our approximation becomes more accurate.
325
  """
326
  )
327
+ mo.vstack([_fig, _explanation])
328
+ return (create_decisecond_visualization,)
329
 
330
 
331
  @app.cell(hide_code=True)
 
342
 
343
  @app.cell(hide_code=True)
344
  def _(mo):
 
345
  intervals_slider = mo.ui.slider(
346
  start = 60,
347
  stop = 10000,
 
359
 
360
  @app.cell(hide_code=True)
361
  def _(intervals_slider, np, pd, plt, stats):
362
+ def create_comparison_plot(n, lambda_value):
363
+ # Calculate probability
364
+ p = lambda_value / n
365
+
366
+ # Binomial probabilities
367
+ x_values = np.arange(0, 15)
368
+ binom_pmf = stats.binom.pmf(x_values, n, p)
369
+
370
+ # True Poisson probabilities
371
+ poisson_pmf = stats.poisson.pmf(x_values, lambda_value)
372
+
373
+ # DF for comparison
374
+ df = pd.DataFrame({
375
+ 'Events': x_values,
376
+ f'Binomial(n={n}, p={p:.6f})': binom_pmf,
377
+ f'Poisson(λ=5)': poisson_pmf,
378
+ 'Difference': np.abs(binom_pmf - poisson_pmf)
379
+ })
380
+
381
+ # Plot both PMFs
382
+ fig, ax = plt.subplots(figsize=(10, 6))
383
+
384
+ # Bar plot for the binomial
385
+ ax.bar(x_values - 0.2, binom_pmf, width=0.4, alpha=0.7,
386
+ color='royalblue', label=f'Binomial(n={n}, p={p:.6f})')
387
+
388
+ # Bar plot for the Poisson
389
+ ax.bar(x_values + 0.2, poisson_pmf, width=0.4, alpha=0.7,
390
+ color='crimson', label='Poisson(λ=5)')
391
+
392
+ # Labels and title
393
+ ax.set_xlabel('Number of Events (k)')
394
+ ax.set_ylabel('Probability')
395
+ ax.set_title(f'Comparison of Binomial and Poisson PMFs with n={n}')
396
+ ax.legend()
397
+ ax.set_xticks(x_values)
398
+ ax.grid(alpha=0.3)
399
+
400
+ plt.tight_layout()
401
+ return df, fig, n, p
402
+
403
+ # Number of intervals from the slider
404
  n = intervals_slider.value
405
  _lambda = 5 # Fixed lambda for our example
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
 
407
+ # Cromparison plot
408
+ df, fig, n, p = create_comparison_plot(n, _lambda)
409
+ return create_comparison_plot, df, fig, n, p
410
 
411
 
412
  @app.cell(hide_code=True)
 
418
  'Difference': '{:.6f}'
419
  })
420
 
421
+ # Calculate the max absolute difference
422
  _max_diff = df['Difference'].max()
423
 
424
  # output
 
517
 
518
  @app.cell
519
  def _(stats):
 
520
  _lambda = 5
521
 
522
  # Calculate probabilities for X = 1, 2, 3
 
546
 
547
  @app.cell(hide_code=True)
548
  def _(np, plt, stats):
549
+ def create_samples_plot(lambda_value, sample_size=1000):
550
+ # Random samples
551
+ samples = stats.poisson.rvs(lambda_value, size=sample_size)
552
+
553
+ # theoretical PMF
554
+ x_values = np.arange(0, max(samples) + 1)
555
+ pmf_values = stats.poisson.pmf(x_values, lambda_value)
556
+
557
+ # histograms to compare
558
+ fig, ax = plt.subplots(figsize=(10, 6))
559
+
560
+ # samples as a histogram
561
+ ax.hist(samples, bins=np.arange(-0.5, max(samples) + 1.5, 1),
562
+ alpha=0.7, density=True, label='Random Samples')
563
+
564
+ # theoretical PMF
565
+ ax.plot(x_values, pmf_values, 'ro-', label='Theoretical PMF')
566
+
567
+ # labels and title
568
+ ax.set_xlabel('Number of Events')
569
+ ax.set_ylabel('Relative Frequency / Probability')
570
+ ax.set_title(f'1000 Random Samples from Poisson(λ={lambda_value})')
571
+ ax.legend()
572
+ ax.grid(alpha=0.3)
573
+
574
+ # annotations
575
+ ax.annotate(f'Sample Mean: {np.mean(samples):.2f}',
576
+ xy=(0.7, 0.9), xycoords='axes fraction',
577
+ bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3))
578
+ ax.annotate(f'Theoretical Mean: {lambda_value:.2f}',
579
+ xy=(0.7, 0.8), xycoords='axes fraction',
580
+ bbox=dict(boxstyle='round,pad=0.5', fc='lightgreen', alpha=0.3))
581
+
582
+ plt.tight_layout()
583
+ return plt.gca()
584
+
585
+ # Use a lambda value of 5 for this example
586
  _lambda = 5
587
+ create_samples_plot(_lambda)
588
+ return (create_samples_plot,)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
589
 
590
 
591
  @app.cell(hide_code=True)
 
606
 
607
  @app.cell(hide_code=True)
608
  def _(mo):
 
609
  rate_slider = mo.ui.slider(
610
  start = 0.1,
611
  stop = 10,
 
629
  return controls, rate_slider, time_slider
630
 
631
 
632
+ @app.cell
633
+ def _(controls):
634
+ controls.center()
635
+ return
636
+
637
+
638
  @app.cell(hide_code=True)
639
  def _(mo, np, plt, rate_slider, stats, time_slider):
640
+ def create_time_scaling_plot(rate, time_period):
641
+ # scaled rate parameter
642
+ lambda_value = rate * time_period
643
+
644
+ # PMF for values
645
+ max_x = max(30, int(lambda_value * 1.5))
646
+ x = np.arange(0, max_x + 1)
647
+ pmf = stats.poisson.pmf(x, lambda_value)
648
+
649
+ # plot
650
+ fig, ax = plt.subplots(figsize=(10, 6))
651
+
652
+ # PMF as bars
653
+ ax.bar(x, pmf, color='royalblue', alpha=0.7,
654
+ label=f'PMF: Poisson(λ={lambda_value:.1f})')
655
+
656
+ # vertical line for mean
657
+ ax.axvline(x=lambda_value, color='red', linestyle='--', linewidth=2,
658
+ label=f'Mean = {lambda_value:.1f}')
659
+
660
+ # labels and title
661
+ ax.set_xlabel('Number of Events')
662
+ ax.set_ylabel('Probability')
663
+ ax.set_title(f'Poisson Distribution Over {time_period} Units (Rate = {rate}/unit)')
664
+
665
+ # better visualization if lambda is large
666
+ if lambda_value > 10:
667
+ ax.set_xlim(lambda_value - 4*np.sqrt(lambda_value),
668
+ lambda_value + 4*np.sqrt(lambda_value))
669
+
670
+ ax.legend()
671
+ ax.grid(alpha=0.3)
672
+
673
+ plt.tight_layout()
674
+
675
+ # Create relevant info markdown
676
+ info_text = f"""
677
+ When the rate is **{rate}** events per unit time and we observe for **{time_period}** units:
678
+
679
+ - The expected number of events is **{lambda_value:.1f}**
680
+ - The variance is also **{lambda_value:.1f}**
681
+ - The standard deviation is **{np.sqrt(lambda_value):.2f}**
682
+ - P(X=0) = {stats.poisson.pmf(0, lambda_value):.4f} (probability of no events)
683
+ - P(X≥10) = {1 - stats.poisson.cdf(9, lambda_value):.4f} (probability of 10 or more events)
684
+ """
685
+
686
+ return plt.gca(), info_text
687
+
688
  # parameters from sliders
689
  _rate = rate_slider.value
690
  _time = time_slider.value
691
 
692
+ # store
693
+ _plot, _info_text = create_time_scaling_plot(_rate, _time)
 
 
 
 
 
694
 
695
+ # Display info as markdown
696
+ info = mo.md(_info_text)
697
 
698
+ mo.vstack([_plot, info], justify="center")
699
+ return create_time_scaling_plot, info
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
700
 
701
 
702
  @app.cell(hide_code=True)