Spaces:
Running
Running
minor edits
Browse files
probability/04_conditional_probability.py
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
|
11 |
import marimo
|
12 |
|
13 |
-
__generated_with = "0.11.
|
14 |
app = marimo.App(width="medium", app_title="Conditional Probability")
|
15 |
|
16 |
|
@@ -20,20 +20,14 @@ def _():
|
|
20 |
return (mo,)
|
21 |
|
22 |
|
23 |
-
@app.cell
|
24 |
-
def _():
|
25 |
-
import matplotlib.pyplot as plt
|
26 |
-
from matplotlib_venn import venn3
|
27 |
-
import numpy as np
|
28 |
-
return np, plt, venn3
|
29 |
-
|
30 |
-
|
31 |
@app.cell(hide_code=True)
|
32 |
def _(mo):
|
33 |
mo.md(
|
34 |
r"""
|
35 |
# Conditional Probability
|
36 |
|
|
|
|
|
37 |
In probability theory, we often want to update our beliefs when we receive new information.
|
38 |
Conditional probability helps us formalize this process by calculating "_what is the chance of
|
39 |
event $E$ happening given that we have already observed some other event $F$?_"[<sup>1</sup>](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/cond_prob/)
|
@@ -54,26 +48,34 @@ def _(mo):
|
|
54 |
r"""
|
55 |
## Definition of Conditional Probability
|
56 |
|
57 |
-
The probability of event $E$ given that event $F$ has occurred is denoted as $P(E
|
58 |
|
59 |
-
$$P(E
|
60 |
|
61 |
This formula tells us that the conditional probability is the probability of both events occurring
|
62 |
divided by the probability of the conditioning event.
|
63 |
|
64 |
-
Let's
|
65 |
"""
|
66 |
)
|
67 |
return
|
68 |
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
@app.cell(hide_code=True)
|
71 |
def _(mo, plt, venn3):
|
72 |
# Create figure with square boundaries
|
73 |
-
plt.figure(figsize=(10,
|
74 |
|
75 |
# Draw square sample space first
|
76 |
-
rect = plt.Rectangle((-2, -2), 4, 4, fill=False, color=
|
77 |
plt.gca().add_patch(rect)
|
78 |
|
79 |
# Set the axis limits to show the full rectangle
|
@@ -83,35 +85,38 @@ def _(mo, plt, venn3):
|
|
83 |
# Create Venn diagram showing E and F
|
84 |
# For venn3, subsets order is: (100, 010, 110, 001, 101, 011, 111)
|
85 |
# Representing: (A, B, AB, C, AC, BC, ABC)
|
86 |
-
v = venn3(subsets=(30, 20, 10, 40, 0, 0, 0),
|
87 |
-
set_labels=('E', 'F', 'Rest'))
|
88 |
|
89 |
# Customize colors
|
90 |
if v:
|
91 |
-
for id in [
|
92 |
if v.get_patch_by_id(id):
|
93 |
-
if id ==
|
94 |
-
v.get_patch_by_id(id).set_color(
|
95 |
-
elif id ==
|
96 |
-
v.get_patch_by_id(id).set_color(
|
97 |
-
elif id ==
|
98 |
-
v.get_patch_by_id(id).set_color(
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
103 |
|
104 |
# Remove ticks but keep the box visible
|
105 |
plt.gca().set_yticks([])
|
106 |
plt.gca().set_xticks([])
|
107 |
-
plt.axis(
|
108 |
|
109 |
# Add sample space annotation with arrow
|
110 |
-
plt.annotate(
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
|
|
115 |
|
116 |
# Add explanation
|
117 |
explanation = mo.md(r"""
|
@@ -125,17 +130,25 @@ def _(mo, plt, venn3):
|
|
125 |
- Remaining cases: 40 (to complete sample space of 100)
|
126 |
|
127 |
When we condition on $F$:
|
128 |
-
$$P(E
|
129 |
|
130 |
This means: When we know $F$ has occurred (restricting ourselves to the green region),
|
131 |
the probability of $E$ also occurring is $\frac{1}{3}$ - as 10 out of the 30 cases in the
|
132 |
green region also belong to the red region.
|
133 |
""")
|
134 |
|
135 |
-
mo.
|
136 |
return explanation, id, rect, v
|
137 |
|
138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
@app.cell
|
140 |
def _():
|
141 |
def conditional_probability(p_intersection, p_condition):
|
@@ -153,7 +166,7 @@ def _(conditional_probability):
|
|
153 |
# Example 1: Rolling a die
|
154 |
# E: Rolling an even number (2,4,6)
|
155 |
# F: Rolling a number greater than 3 (4,5,6)
|
156 |
-
p_even_given_greater_than_3 = conditional_probability(2/6, 3/6)
|
157 |
print("Example 1: Rolling a die")
|
158 |
print(f"P(Even | >3) = {p_even_given_greater_than_3}") # Should be 2/3
|
159 |
return (p_even_given_greater_than_3,)
|
@@ -164,7 +177,7 @@ def _(conditional_probability):
|
|
164 |
# Example 2: Cards
|
165 |
# E: Drawing a Heart
|
166 |
# F: Drawing a Face card (J,Q,K)
|
167 |
-
p_heart_given_face = conditional_probability(3/52, 12/52)
|
168 |
print("\nExample 2: Drawing cards")
|
169 |
print(f"P(Heart | Face card) = {p_heart_given_face}") # Should be 1/4
|
170 |
return (p_heart_given_face,)
|
@@ -226,10 +239,10 @@ def _(mo):
|
|
226 |
|
227 |
| Rule | Original | Conditioned on $G$ |
|
228 |
|------|----------|-------------------|
|
229 |
-
| Axiom 1 | $0 \leq P(E) \leq 1$ | $0 \leq P(E
|
230 |
-
| Axiom 2 | $P(S) = 1$ | $P(S
|
231 |
-
| Axiom 3* | $P(E \cup F) = P(E) + P(F)$ | $P(E \cup F
|
232 |
-
| Complement | $P(E^C) = 1 - P(E)$ | $P(E^C
|
233 |
|
234 |
*_For mutually exclusive events_
|
235 |
"""
|
@@ -243,12 +256,12 @@ def _(mo):
|
|
243 |
r"""
|
244 |
## Multiple Conditions
|
245 |
|
246 |
-
We can condition on multiple events. The notation $P(E
|
247 |
occurring, given that both $F$ and $G$ have occurred._"
|
248 |
|
249 |
The conditional probability formula still holds in the universe where $G$ has occurred:
|
250 |
|
251 |
-
$$P(E
|
252 |
|
253 |
This is a powerful extension that allows us to update our probabilities as we receive
|
254 |
multiple pieces of information.
|
@@ -259,12 +272,16 @@ def _(mo):
|
|
259 |
|
260 |
@app.cell
|
261 |
def _():
|
262 |
-
def multiple_conditional_probability(
|
|
|
|
|
263 |
"""Calculate P(E|F,G) = P(E∩F|G)/P(F|G) = P(E∩F∩G)/P(F∩G)"""
|
264 |
if p_condition == 0:
|
265 |
raise ValueError("Cannot condition on an impossible event")
|
266 |
if p_intersection_conditions == 0:
|
267 |
-
raise ValueError(
|
|
|
|
|
268 |
if p_intersection_all > p_intersection_conditions:
|
269 |
raise ValueError("P(E∩F∩G) cannot be greater than P(F∩G)")
|
270 |
|
@@ -284,7 +301,9 @@ def _(multiple_conditional_probability):
|
|
284 |
|
285 |
p_admit_given_both = multiple_conditional_probability(0.15, 0.25, 0.25)
|
286 |
print("College Admissions Example:")
|
287 |
-
print(
|
|
|
|
|
288 |
|
289 |
# Error case: impossible condition
|
290 |
try:
|
@@ -335,7 +354,7 @@ def _(mo):
|
|
335 |
You've learned:
|
336 |
|
337 |
- How conditional probability updates our beliefs with new information
|
338 |
-
- The formula $P(E
|
339 |
- How probability rules work in conditional universes
|
340 |
- How to handle multiple conditions
|
341 |
|
|
|
10 |
|
11 |
import marimo
|
12 |
|
13 |
+
__generated_with = "0.11.4"
|
14 |
app = marimo.App(width="medium", app_title="Conditional Probability")
|
15 |
|
16 |
|
|
|
20 |
return (mo,)
|
21 |
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
@app.cell(hide_code=True)
|
24 |
def _(mo):
|
25 |
mo.md(
|
26 |
r"""
|
27 |
# Conditional Probability
|
28 |
|
29 |
+
_This notebook is a computational companion to the book ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/cond_prob/), by Stanford professor Chris Piech._
|
30 |
+
|
31 |
In probability theory, we often want to update our beliefs when we receive new information.
|
32 |
Conditional probability helps us formalize this process by calculating "_what is the chance of
|
33 |
event $E$ happening given that we have already observed some other event $F$?_"[<sup>1</sup>](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/cond_prob/)
|
|
|
48 |
r"""
|
49 |
## Definition of Conditional Probability
|
50 |
|
51 |
+
The probability of event $E$ given that event $F$ has occurred is denoted as $P(E \mid F)$ and is defined as:
|
52 |
|
53 |
+
$$P(E \mid F) = \frac{P(E \cap F)}{P(F)}$$
|
54 |
|
55 |
This formula tells us that the conditional probability is the probability of both events occurring
|
56 |
divided by the probability of the conditioning event.
|
57 |
|
58 |
+
Let's start with a visual example.
|
59 |
"""
|
60 |
)
|
61 |
return
|
62 |
|
63 |
|
64 |
+
@app.cell
|
65 |
+
def _():
|
66 |
+
import matplotlib.pyplot as plt
|
67 |
+
from matplotlib_venn import venn3
|
68 |
+
import numpy as np
|
69 |
+
return np, plt, venn3
|
70 |
+
|
71 |
+
|
72 |
@app.cell(hide_code=True)
|
73 |
def _(mo, plt, venn3):
|
74 |
# Create figure with square boundaries
|
75 |
+
plt.figure(figsize=(10, 3))
|
76 |
|
77 |
# Draw square sample space first
|
78 |
+
rect = plt.Rectangle((-2, -2), 4, 4, fill=False, color="gray", linestyle="--")
|
79 |
plt.gca().add_patch(rect)
|
80 |
|
81 |
# Set the axis limits to show the full rectangle
|
|
|
85 |
# Create Venn diagram showing E and F
|
86 |
# For venn3, subsets order is: (100, 010, 110, 001, 101, 011, 111)
|
87 |
# Representing: (A, B, AB, C, AC, BC, ABC)
|
88 |
+
v = venn3(subsets=(30, 20, 10, 40, 0, 0, 0), set_labels=("E", "F", "Rest"))
|
|
|
89 |
|
90 |
# Customize colors
|
91 |
if v:
|
92 |
+
for id in ["100", "010", "110", "001"]:
|
93 |
if v.get_patch_by_id(id):
|
94 |
+
if id == "100":
|
95 |
+
v.get_patch_by_id(id).set_color("#ffcccc") # Light red for E
|
96 |
+
elif id == "010":
|
97 |
+
v.get_patch_by_id(id).set_color("#ccffcc") # Light green for F
|
98 |
+
elif id == "110":
|
99 |
+
v.get_patch_by_id(id).set_color(
|
100 |
+
"#e6ffe6"
|
101 |
+
) # Lighter green for intersection
|
102 |
+
elif id == "001":
|
103 |
+
v.get_patch_by_id(id).set_color("white") # White for rest
|
104 |
+
|
105 |
+
plt.title("Conditional Probability in Sample Space")
|
106 |
|
107 |
# Remove ticks but keep the box visible
|
108 |
plt.gca().set_yticks([])
|
109 |
plt.gca().set_xticks([])
|
110 |
+
plt.axis("on")
|
111 |
|
112 |
# Add sample space annotation with arrow
|
113 |
+
plt.annotate(
|
114 |
+
"Sample Space (100)",
|
115 |
+
xy=(-1.5, 1.5),
|
116 |
+
xytext=(-2.2, 2),
|
117 |
+
bbox=dict(boxstyle="round,pad=0.5", fc="white", ec="gray"),
|
118 |
+
arrowprops=dict(arrowstyle="->"),
|
119 |
+
)
|
120 |
|
121 |
# Add explanation
|
122 |
explanation = mo.md(r"""
|
|
|
130 |
- Remaining cases: 40 (to complete sample space of 100)
|
131 |
|
132 |
When we condition on $F$:
|
133 |
+
$$P(E \mid F) = \frac{P(E \cap F)}{P(F)} = \frac{10}{30} = \frac{1}{3} \approx 0.33$$
|
134 |
|
135 |
This means: When we know $F$ has occurred (restricting ourselves to the green region),
|
136 |
the probability of $E$ also occurring is $\frac{1}{3}$ - as 10 out of the 30 cases in the
|
137 |
green region also belong to the red region.
|
138 |
""")
|
139 |
|
140 |
+
mo.vstack([mo.center(plt.gcf()), explanation])
|
141 |
return explanation, id, rect, v
|
142 |
|
143 |
|
144 |
+
@app.cell(hide_code=True)
|
145 |
+
def _(mo):
|
146 |
+
mo.md(
|
147 |
+
r"Next, here's a function that computes $P(E \mid F)$, given $P( E \cap F)$ and $P(F)$"
|
148 |
+
)
|
149 |
+
return
|
150 |
+
|
151 |
+
|
152 |
@app.cell
|
153 |
def _():
|
154 |
def conditional_probability(p_intersection, p_condition):
|
|
|
166 |
# Example 1: Rolling a die
|
167 |
# E: Rolling an even number (2,4,6)
|
168 |
# F: Rolling a number greater than 3 (4,5,6)
|
169 |
+
p_even_given_greater_than_3 = conditional_probability(2 / 6, 3 / 6)
|
170 |
print("Example 1: Rolling a die")
|
171 |
print(f"P(Even | >3) = {p_even_given_greater_than_3}") # Should be 2/3
|
172 |
return (p_even_given_greater_than_3,)
|
|
|
177 |
# Example 2: Cards
|
178 |
# E: Drawing a Heart
|
179 |
# F: Drawing a Face card (J,Q,K)
|
180 |
+
p_heart_given_face = conditional_probability(3 / 52, 12 / 52)
|
181 |
print("\nExample 2: Drawing cards")
|
182 |
print(f"P(Heart | Face card) = {p_heart_given_face}") # Should be 1/4
|
183 |
return (p_heart_given_face,)
|
|
|
239 |
|
240 |
| Rule | Original | Conditioned on $G$ |
|
241 |
|------|----------|-------------------|
|
242 |
+
| Axiom 1 | $0 \leq P(E) \leq 1$ | $0 \leq P(E \mid G) \leq 1$ |
|
243 |
+
| Axiom 2 | $P(S) = 1$ | $P(S \mid G) = 1$ |
|
244 |
+
| Axiom 3* | $P(E \cup F) = P(E) + P(F)$ | $P(E \cup F \mid G) = P(E \mid G) + P(F \mid G)$ |
|
245 |
+
| Complement | $P(E^C) = 1 - P(E)$ | $P(E^C \mid G) = 1 - P(E \mid G)$ |
|
246 |
|
247 |
*_For mutually exclusive events_
|
248 |
"""
|
|
|
256 |
r"""
|
257 |
## Multiple Conditions
|
258 |
|
259 |
+
We can condition on multiple events. The notation $P(E \mid F,G)$ means "_the probability of $E$
|
260 |
occurring, given that both $F$ and $G$ have occurred._"
|
261 |
|
262 |
The conditional probability formula still holds in the universe where $G$ has occurred:
|
263 |
|
264 |
+
$$P(E \mid F,G) = \frac{P(E \cap F \mid G)}{P(F \mid G)}$$
|
265 |
|
266 |
This is a powerful extension that allows us to update our probabilities as we receive
|
267 |
multiple pieces of information.
|
|
|
272 |
|
273 |
@app.cell
|
274 |
def _():
|
275 |
+
def multiple_conditional_probability(
|
276 |
+
p_intersection_all, p_intersection_conditions, p_condition
|
277 |
+
):
|
278 |
"""Calculate P(E|F,G) = P(E∩F|G)/P(F|G) = P(E∩F∩G)/P(F∩G)"""
|
279 |
if p_condition == 0:
|
280 |
raise ValueError("Cannot condition on an impossible event")
|
281 |
if p_intersection_conditions == 0:
|
282 |
+
raise ValueError(
|
283 |
+
"Cannot condition on an impossible combination of events"
|
284 |
+
)
|
285 |
if p_intersection_all > p_intersection_conditions:
|
286 |
raise ValueError("P(E∩F∩G) cannot be greater than P(F∩G)")
|
287 |
|
|
|
301 |
|
302 |
p_admit_given_both = multiple_conditional_probability(0.15, 0.25, 0.25)
|
303 |
print("College Admissions Example:")
|
304 |
+
print(
|
305 |
+
f"P(Admitted | High GPA, Good Scores) = {p_admit_given_both}"
|
306 |
+
) # Should be 0.6
|
307 |
|
308 |
# Error case: impossible condition
|
309 |
try:
|
|
|
354 |
You've learned:
|
355 |
|
356 |
- How conditional probability updates our beliefs with new information
|
357 |
+
- The formula $P(E \mid F) = P(E \cap F)/P(F)$ and its intuition
|
358 |
- How probability rules work in conditional universes
|
359 |
- How to handle multiple conditions
|
360 |
|