Spaces:
Running
Running
Add `conditional probability` notebook
Browse files
probability/04_conditional_probability.py
ADDED
@@ -0,0 +1,350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# /// script
|
2 |
+
# requires-python = ">=3.10"
|
3 |
+
# dependencies = [
|
4 |
+
# "marimo",
|
5 |
+
# "matplotlib==3.10.0",
|
6 |
+
# "matplotlib-venn==1.1.1",
|
7 |
+
# "numpy==2.2.2",
|
8 |
+
# ]
|
9 |
+
# ///
|
10 |
+
|
11 |
+
import marimo
|
12 |
+
|
13 |
+
__generated_with = "0.11.2"
|
14 |
+
app = marimo.App(width="medium", app_title="Conditional Probability")
|
15 |
+
|
16 |
+
|
17 |
+
@app.cell
|
18 |
+
def _():
|
19 |
+
import marimo as mo
|
20 |
+
return (mo,)
|
21 |
+
|
22 |
+
|
23 |
+
@app.cell
|
24 |
+
def _():
|
25 |
+
import matplotlib.pyplot as plt
|
26 |
+
from matplotlib_venn import venn3
|
27 |
+
import numpy as np
|
28 |
+
return np, plt, venn3
|
29 |
+
|
30 |
+
|
31 |
+
@app.cell(hide_code=True)
|
32 |
+
def _(mo):
|
33 |
+
mo.md(
|
34 |
+
r"""
|
35 |
+
# Conditional Probability
|
36 |
+
|
37 |
+
In probability theory, we often want to update our beliefs when we receive new information.
|
38 |
+
Conditional probability helps us formalize this process by calculating "_what is the chance of
|
39 |
+
event $E$ happening given that we have already observed some other event $F$?_"[<sup>1</sup>](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/cond_prob/)
|
40 |
+
|
41 |
+
When we condition on an event $F$:
|
42 |
+
|
43 |
+
- We enter the universe where $F$ has occurred
|
44 |
+
- Only outcomes consistent with $F$ are possible
|
45 |
+
- Our sample space reduces to $F$
|
46 |
+
"""
|
47 |
+
)
|
48 |
+
return
|
49 |
+
|
50 |
+
|
51 |
+
@app.cell(hide_code=True)
|
52 |
+
def _(mo):
|
53 |
+
mo.md(
|
54 |
+
r"""
|
55 |
+
## Definition of Conditional Probability
|
56 |
+
|
57 |
+
The probability of event $E$ given that event $F$ has occurred is denoted as $P(E|F)$ and is defined as:
|
58 |
+
|
59 |
+
$$P(E|F) = \frac{P(E \cap F)}{P(F)}$$
|
60 |
+
|
61 |
+
This formula tells us that the conditional probability is the probability of both events occurring
|
62 |
+
divided by the probability of the conditioning event.
|
63 |
+
|
64 |
+
Let's understand this with a function that computes conditional probability:
|
65 |
+
"""
|
66 |
+
)
|
67 |
+
return
|
68 |
+
|
69 |
+
|
70 |
+
@app.cell(hide_code=True)
|
71 |
+
def _(mo, plt, venn3):
|
72 |
+
# Create figure with square boundaries
|
73 |
+
plt.figure(figsize=(10, 5))
|
74 |
+
|
75 |
+
# Draw square sample space first
|
76 |
+
rect = plt.Rectangle((-2, -2), 4, 4, fill=False, color='gray', linestyle='--')
|
77 |
+
plt.gca().add_patch(rect)
|
78 |
+
|
79 |
+
# Set the axis limits to show the full rectangle
|
80 |
+
plt.xlim(-2.5, 2.5)
|
81 |
+
plt.ylim(-2.5, 2.5)
|
82 |
+
|
83 |
+
# Create Venn diagram showing E and F
|
84 |
+
# For venn3, subsets order is: (100, 010, 110, 001, 101, 011, 111)
|
85 |
+
# Representing: (A, B, AB, C, AC, BC, ABC)
|
86 |
+
v = venn3(subsets=(30, 20, 10, 40, 0, 0, 0),
|
87 |
+
set_labels=('E', 'F', 'Rest'))
|
88 |
+
|
89 |
+
# Customize colors
|
90 |
+
if v:
|
91 |
+
for id in ['100', '010', '110', '001']:
|
92 |
+
if v.get_patch_by_id(id):
|
93 |
+
if id == '100':
|
94 |
+
v.get_patch_by_id(id).set_color('#ffcccc') # Light red for E
|
95 |
+
elif id == '010':
|
96 |
+
v.get_patch_by_id(id).set_color('#ccffcc') # Light green for F
|
97 |
+
elif id == '110':
|
98 |
+
v.get_patch_by_id(id).set_color('#e6ffe6') # Lighter green for intersection
|
99 |
+
elif id == '001':
|
100 |
+
v.get_patch_by_id(id).set_color('white') # White for rest
|
101 |
+
|
102 |
+
plt.title('Conditional Probability in Sample Space')
|
103 |
+
|
104 |
+
# Remove ticks but keep the box visible
|
105 |
+
plt.gca().set_yticks([])
|
106 |
+
plt.gca().set_xticks([])
|
107 |
+
plt.axis('on')
|
108 |
+
|
109 |
+
# Add sample space annotation with arrow
|
110 |
+
plt.annotate('Sample Space (100)',
|
111 |
+
xy=(-1.5, 1.5),
|
112 |
+
xytext=(-2.2, 2),
|
113 |
+
bbox=dict(boxstyle='round,pad=0.5', fc='white', ec='gray'),
|
114 |
+
arrowprops=dict(arrowstyle='->'))
|
115 |
+
|
116 |
+
# Add explanation
|
117 |
+
explanation = mo.md(r"""
|
118 |
+
### Visual Intuition
|
119 |
+
|
120 |
+
In our sample space of 100 outcomes:
|
121 |
+
|
122 |
+
- Event $E$ occurs in 40 cases (red region: 30 + 10)
|
123 |
+
- Event $F$ occurs in 30 cases (green region: 20 + 10)
|
124 |
+
- Both events occur together in 10 cases (overlap)
|
125 |
+
- Remaining cases: 40 (to complete sample space of 100)
|
126 |
+
|
127 |
+
When we condition on $F$:
|
128 |
+
$$P(E|F) = \frac{P(E \cap F)}{P(F)} = \frac{10}{30} = \frac{1}{3} \approx 0.33$$
|
129 |
+
|
130 |
+
This means: When we know $F$ has occurred (restricting ourselves to the green region),
|
131 |
+
the probability of $E$ also occurring is $\frac{1}{3}$ - as 10 out of the 30 cases in the
|
132 |
+
green region also belong to the red region.
|
133 |
+
""")
|
134 |
+
|
135 |
+
mo.hstack([plt.gcf(), explanation])
|
136 |
+
return explanation, id, rect, v
|
137 |
+
|
138 |
+
|
139 |
+
@app.cell
|
140 |
+
def _():
|
141 |
+
def conditional_probability(p_intersection, p_condition):
|
142 |
+
if p_condition == 0:
|
143 |
+
raise ValueError("Cannot condition on an impossible event")
|
144 |
+
if p_intersection > p_condition:
|
145 |
+
raise ValueError("P(E∩F) cannot be greater than P(F)")
|
146 |
+
|
147 |
+
return p_intersection / p_condition
|
148 |
+
return (conditional_probability,)
|
149 |
+
|
150 |
+
|
151 |
+
@app.cell
|
152 |
+
def _(conditional_probability):
|
153 |
+
# Example 1: Rolling a die
|
154 |
+
# E: Rolling an even number (2,4,6)
|
155 |
+
# F: Rolling a number greater than 3 (4,5,6)
|
156 |
+
p_even_given_greater_than_3 = conditional_probability(2/6, 3/6)
|
157 |
+
print("Example 1: Rolling a die")
|
158 |
+
print(f"P(Even | >3) = {p_even_given_greater_than_3}") # Should be 2/3
|
159 |
+
return (p_even_given_greater_than_3,)
|
160 |
+
|
161 |
+
|
162 |
+
@app.cell
|
163 |
+
def _(conditional_probability):
|
164 |
+
# Example 2: Cards
|
165 |
+
# E: Drawing a Heart
|
166 |
+
# F: Drawing a Face card (J,Q,K)
|
167 |
+
p_heart_given_face = conditional_probability(3/52, 12/52)
|
168 |
+
print("\nExample 2: Drawing cards")
|
169 |
+
print(f"P(Heart | Face card) = {p_heart_given_face}") # Should be 1/4
|
170 |
+
return (p_heart_given_face,)
|
171 |
+
|
172 |
+
|
173 |
+
@app.cell
|
174 |
+
def _(conditional_probability):
|
175 |
+
# Example 3: Student grades
|
176 |
+
# E: Getting an A
|
177 |
+
# F: Studying more than 3 hours
|
178 |
+
p_a_given_study = conditional_probability(0.24, 0.40)
|
179 |
+
print("\nExample 3: Student grades")
|
180 |
+
print(f"P(A | Studied >3hrs) = {p_a_given_study}") # Should be 0.6
|
181 |
+
return (p_a_given_study,)
|
182 |
+
|
183 |
+
|
184 |
+
@app.cell
|
185 |
+
def _(conditional_probability):
|
186 |
+
# Example 4: Weather
|
187 |
+
# E: Raining
|
188 |
+
# F: Cloudy
|
189 |
+
p_rain_given_cloudy = conditional_probability(0.15, 0.30)
|
190 |
+
print("\nExample 4: Weather")
|
191 |
+
print(f"P(Rain | Cloudy) = {p_rain_given_cloudy}") # Should be 0.5
|
192 |
+
return (p_rain_given_cloudy,)
|
193 |
+
|
194 |
+
|
195 |
+
@app.cell
|
196 |
+
def _(conditional_probability):
|
197 |
+
# Example 5: Error cases
|
198 |
+
print("\nExample 5: Error cases")
|
199 |
+
try:
|
200 |
+
# Cannot condition on impossible event
|
201 |
+
conditional_probability(0.5, 0)
|
202 |
+
except ValueError as e:
|
203 |
+
print(f"Error 1: {e}")
|
204 |
+
|
205 |
+
try:
|
206 |
+
# Intersection cannot be larger than condition
|
207 |
+
conditional_probability(0.7, 0.5)
|
208 |
+
except ValueError as e:
|
209 |
+
print(f"Error 2: {e}")
|
210 |
+
return
|
211 |
+
|
212 |
+
|
213 |
+
@app.cell(hide_code=True)
|
214 |
+
def _(mo):
|
215 |
+
mo.md(
|
216 |
+
r"""
|
217 |
+
## The Conditional Paradigm
|
218 |
+
|
219 |
+
When we condition on an event, we enter a new probability universe. In this universe:
|
220 |
+
|
221 |
+
1. All probability axioms still hold
|
222 |
+
2. We must consistently condition on the same event
|
223 |
+
3. Our sample space becomes the conditioning event
|
224 |
+
|
225 |
+
Here's how our familiar probability rules look when conditioned on event $G$:
|
226 |
+
|
227 |
+
| Rule | Original | Conditioned on $G$ |
|
228 |
+
|------|----------|-------------------|
|
229 |
+
| Axiom 1 | $0 \leq P(E) \leq 1$ | $0 \leq P(E\|G) \leq 1$ |
|
230 |
+
| Axiom 2 | $P(S) = 1$ | $P(S\|G) = 1$ |
|
231 |
+
| Axiom 3* | $P(E \cup F) = P(E) + P(F)$ | $P(E \cup F\|G) = P(E\|G) + P(F\|G)$ |
|
232 |
+
| Complement | $P(E^C) = 1 - P(E)$ | $P(E^C\|G) = 1 - P(E\|G)$ |
|
233 |
+
|
234 |
+
*_For mutually exclusive events_
|
235 |
+
"""
|
236 |
+
)
|
237 |
+
return
|
238 |
+
|
239 |
+
|
240 |
+
@app.cell(hide_code=True)
|
241 |
+
def _(mo):
|
242 |
+
mo.md(
|
243 |
+
r"""
|
244 |
+
## Multiple Conditions
|
245 |
+
|
246 |
+
We can condition on multiple events. The notation $P(E|F,G)$ means "_the probability of $E$
|
247 |
+
occurring, given that both $F$ and $G$ have occurred._"
|
248 |
+
|
249 |
+
The conditional probability formula still holds in the universe where $G$ has occurred:
|
250 |
+
|
251 |
+
$$P(E|F,G) = \frac{P(E \cap F|G)}{P(F|G)}$$
|
252 |
+
|
253 |
+
This is a powerful extension that allows us to update our probabilities as we receive
|
254 |
+
multiple pieces of information.
|
255 |
+
"""
|
256 |
+
)
|
257 |
+
return
|
258 |
+
|
259 |
+
|
260 |
+
@app.cell
|
261 |
+
def _():
|
262 |
+
def multiple_conditional_probability(p_intersection_all, p_intersection_conditions, p_condition):
|
263 |
+
"""Calculate P(E|F,G) = P(E∩F|G)/P(F|G) = P(E∩F∩G)/P(F∩G)"""
|
264 |
+
if p_condition == 0:
|
265 |
+
raise ValueError("Cannot condition on an impossible event")
|
266 |
+
if p_intersection_conditions == 0:
|
267 |
+
raise ValueError("Cannot condition on an impossible combination of events")
|
268 |
+
if p_intersection_all > p_intersection_conditions:
|
269 |
+
raise ValueError("P(E∩F∩G) cannot be greater than P(F∩G)")
|
270 |
+
|
271 |
+
return p_intersection_all / p_intersection_conditions
|
272 |
+
return (multiple_conditional_probability,)
|
273 |
+
|
274 |
+
|
275 |
+
@app.cell
|
276 |
+
def _(multiple_conditional_probability):
|
277 |
+
# Example: College admissions
|
278 |
+
# E: Getting admitted
|
279 |
+
# F: High GPA
|
280 |
+
# G: Good test scores
|
281 |
+
|
282 |
+
# P(E∩F∩G) = P(Admitted ∩ HighGPA ∩ GoodScore) = 0.15
|
283 |
+
# P(F∩G) = P(HighGPA ∩ GoodScore) = 0.25
|
284 |
+
|
285 |
+
p_admit_given_both = multiple_conditional_probability(0.15, 0.25, 0.25)
|
286 |
+
print("College Admissions Example:")
|
287 |
+
print(f"P(Admitted | High GPA, Good Scores) = {p_admit_given_both}") # Should be 0.6
|
288 |
+
|
289 |
+
# Error case: impossible condition
|
290 |
+
try:
|
291 |
+
multiple_conditional_probability(0.3, 0.2, 0.2)
|
292 |
+
except ValueError as e:
|
293 |
+
print(f"\nError case: {e}")
|
294 |
+
return (p_admit_given_both,)
|
295 |
+
|
296 |
+
|
297 |
+
@app.cell(hide_code=True)
|
298 |
+
def _(mo):
|
299 |
+
mo.md(
|
300 |
+
r"""
|
301 |
+
## 🤔 Test Your Understanding
|
302 |
+
|
303 |
+
Which of these statements about conditional probability are true?
|
304 |
+
|
305 |
+
<details>
|
306 |
+
<summary>Knowing F occurred always decreases the probability of E</summary>
|
307 |
+
❌ False! Conditioning on F can either increase or decrease P(E), depending on how E and F are related.
|
308 |
+
</details>
|
309 |
+
|
310 |
+
<details>
|
311 |
+
<summary>P(E|F) represents entering a new probability universe where F has occurred</summary>
|
312 |
+
✅ True! We restrict ourselves to only the outcomes where F occurred, making F our new sample space.
|
313 |
+
</details>
|
314 |
+
|
315 |
+
<details>
|
316 |
+
<summary>If P(E|F) = P(E), then E and F must be the same event</summary>
|
317 |
+
❌ False! This actually means E and F are independent - knowing one doesn't affect the other.
|
318 |
+
</details>
|
319 |
+
|
320 |
+
<details>
|
321 |
+
<summary>P(E|F) can be calculated by dividing P(E∩F) by P(F)</summary>
|
322 |
+
✅ True! This is the fundamental definition of conditional probability.
|
323 |
+
</details>
|
324 |
+
"""
|
325 |
+
)
|
326 |
+
return
|
327 |
+
|
328 |
+
|
329 |
+
@app.cell(hide_code=True)
|
330 |
+
def _(mo):
|
331 |
+
mo.md(
|
332 |
+
"""
|
333 |
+
## Summary
|
334 |
+
|
335 |
+
You've learned:
|
336 |
+
|
337 |
+
- How conditional probability updates our beliefs with new information
|
338 |
+
- The formula $P(E|F) = P(E \cap F)/P(F)$ and its intuition
|
339 |
+
- How probability rules work in conditional universes
|
340 |
+
- How to handle multiple conditions
|
341 |
+
|
342 |
+
In the next lesson, we'll explore **independence** - when knowing about one event
|
343 |
+
tells us nothing about another.
|
344 |
+
"""
|
345 |
+
)
|
346 |
+
return
|
347 |
+
|
348 |
+
|
349 |
+
if __name__ == "__main__":
|
350 |
+
app.run()
|