Spaces:
Running
Running
File size: 11,441 Bytes
53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 a50dbac 53609e5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 |
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "marimo",
# "matplotlib==3.10.0",
# "matplotlib-venn==1.1.1",
# "numpy==2.2.2",
# ]
# ///
import marimo
__generated_with = "0.11.4"
app = marimo.App(width="medium", app_title="Conditional Probability")
@app.cell
def _():
import marimo as mo
return (mo,)
@app.cell(hide_code=True)
def _(mo):
mo.md(
r"""
# Conditional Probability
_This notebook is a computational companion to the book ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/cond_prob/), by Stanford professor Chris Piech._
In probability theory, we often want to update our beliefs when we receive new information.
Conditional probability helps us formalize this process by calculating "_what is the chance of
event $E$ happening given that we have already observed some other event $F$?_"[<sup>1</sup>](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/cond_prob/)
When we condition on an event $F$:
- We enter the universe where $F$ has occurred
- Only outcomes consistent with $F$ are possible
- Our sample space reduces to $F$
"""
)
return
@app.cell(hide_code=True)
def _(mo):
mo.md(
r"""
## Definition of Conditional Probability
The probability of event $E$ given that event $F$ has occurred is denoted as $P(E \mid F)$ and is defined as:
$$P(E \mid F) = \frac{P(E \cap F)}{P(F)}$$
This formula tells us that the conditional probability is the probability of both events occurring
divided by the probability of the conditioning event.
Let's start with a visual example.
"""
)
return
@app.cell
def _():
import matplotlib.pyplot as plt
from matplotlib_venn import venn3
import numpy as np
return np, plt, venn3
@app.cell(hide_code=True)
def _(mo, plt, venn3):
# Create figure with square boundaries
plt.figure(figsize=(10, 3))
# Draw square sample space first
rect = plt.Rectangle((-2, -2), 4, 4, fill=False, color="gray", linestyle="--")
plt.gca().add_patch(rect)
# Set the axis limits to show the full rectangle
plt.xlim(-2.5, 2.5)
plt.ylim(-2.5, 2.5)
# Create Venn diagram showing E and F
# For venn3, subsets order is: (100, 010, 110, 001, 101, 011, 111)
# Representing: (A, B, AB, C, AC, BC, ABC)
v = venn3(subsets=(30, 20, 10, 40, 0, 0, 0), set_labels=("E", "F", "Rest"))
# Customize colors
if v:
for id in ["100", "010", "110", "001"]:
if v.get_patch_by_id(id):
if id == "100":
v.get_patch_by_id(id).set_color("#ffcccc") # Light red for E
elif id == "010":
v.get_patch_by_id(id).set_color("#ccffcc") # Light green for F
elif id == "110":
v.get_patch_by_id(id).set_color(
"#e6ffe6"
) # Lighter green for intersection
elif id == "001":
v.get_patch_by_id(id).set_color("white") # White for rest
plt.title("Conditional Probability in Sample Space")
# Remove ticks but keep the box visible
plt.gca().set_yticks([])
plt.gca().set_xticks([])
plt.axis("on")
# Add sample space annotation with arrow
plt.annotate(
"Sample Space (100)",
xy=(-1.5, 1.5),
xytext=(-2.2, 2),
bbox=dict(boxstyle="round,pad=0.5", fc="white", ec="gray"),
arrowprops=dict(arrowstyle="->"),
)
# Add explanation
explanation = mo.md(r"""
### Visual Intuition
In our sample space of 100 outcomes:
- Event $E$ occurs in 40 cases (red region: 30 + 10)
- Event $F$ occurs in 30 cases (green region: 20 + 10)
- Both events occur together in 10 cases (overlap)
- Remaining cases: 40 (to complete sample space of 100)
When we condition on $F$:
$$P(E \mid F) = \frac{P(E \cap F)}{P(F)} = \frac{10}{30} = \frac{1}{3} \approx 0.33$$
This means: When we know $F$ has occurred (restricting ourselves to the green region),
the probability of $E$ also occurring is $\frac{1}{3}$ - as 10 out of the 30 cases in the
green region also belong to the red region.
""")
mo.vstack([mo.center(plt.gcf()), explanation])
return explanation, id, rect, v
@app.cell(hide_code=True)
def _(mo):
mo.md(
r"Next, here's a function that computes $P(E \mid F)$, given $P( E \cap F)$ and $P(F)$"
)
return
@app.cell
def _():
def conditional_probability(p_intersection, p_condition):
if p_condition == 0:
raise ValueError("Cannot condition on an impossible event")
if p_intersection > p_condition:
raise ValueError("P(E∩F) cannot be greater than P(F)")
return p_intersection / p_condition
return (conditional_probability,)
@app.cell
def _(conditional_probability):
# Example 1: Rolling a die
# E: Rolling an even number (2,4,6)
# F: Rolling a number greater than 3 (4,5,6)
p_even_given_greater_than_3 = conditional_probability(2 / 6, 3 / 6)
print("Example 1: Rolling a die")
print(f"P(Even | >3) = {p_even_given_greater_than_3}") # Should be 2/3
return (p_even_given_greater_than_3,)
@app.cell
def _(conditional_probability):
# Example 2: Cards
# E: Drawing a Heart
# F: Drawing a Face card (J,Q,K)
p_heart_given_face = conditional_probability(3 / 52, 12 / 52)
print("\nExample 2: Drawing cards")
print(f"P(Heart | Face card) = {p_heart_given_face}") # Should be 1/4
return (p_heart_given_face,)
@app.cell
def _(conditional_probability):
# Example 3: Student grades
# E: Getting an A
# F: Studying more than 3 hours
p_a_given_study = conditional_probability(0.24, 0.40)
print("\nExample 3: Student grades")
print(f"P(A | Studied >3hrs) = {p_a_given_study}") # Should be 0.6
return (p_a_given_study,)
@app.cell
def _(conditional_probability):
# Example 4: Weather
# E: Raining
# F: Cloudy
p_rain_given_cloudy = conditional_probability(0.15, 0.30)
print("\nExample 4: Weather")
print(f"P(Rain | Cloudy) = {p_rain_given_cloudy}") # Should be 0.5
return (p_rain_given_cloudy,)
@app.cell
def _(conditional_probability):
# Example 5: Error cases
print("\nExample 5: Error cases")
try:
# Cannot condition on impossible event
conditional_probability(0.5, 0)
except ValueError as e:
print(f"Error 1: {e}")
try:
# Intersection cannot be larger than condition
conditional_probability(0.7, 0.5)
except ValueError as e:
print(f"Error 2: {e}")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(
r"""
## The Conditional Paradigm
When we condition on an event, we enter a new probability universe. In this universe:
1. All probability axioms still hold
2. We must consistently condition on the same event
3. Our sample space becomes the conditioning event
Here's how our familiar probability rules look when conditioned on event $G$:
| Rule | Original | Conditioned on $G$ |
|------|----------|-------------------|
| Axiom 1 | $0 \leq P(E) \leq 1$ | $0 \leq P(E \mid G) \leq 1$ |
| Axiom 2 | $P(S) = 1$ | $P(S \mid G) = 1$ |
| Axiom 3* | $P(E \cup F) = P(E) + P(F)$ | $P(E \cup F \mid G) = P(E \mid G) + P(F \mid G)$ |
| Complement | $P(E^C) = 1 - P(E)$ | $P(E^C \mid G) = 1 - P(E \mid G)$ |
*_For mutually exclusive events_
"""
)
return
@app.cell(hide_code=True)
def _(mo):
mo.md(
r"""
## Multiple Conditions
We can condition on multiple events. The notation $P(E \mid F,G)$ means "_the probability of $E$
occurring, given that both $F$ and $G$ have occurred._"
The conditional probability formula still holds in the universe where $G$ has occurred:
$$P(E \mid F,G) = \frac{P(E \cap F \mid G)}{P(F \mid G)}$$
This is a powerful extension that allows us to update our probabilities as we receive
multiple pieces of information.
"""
)
return
@app.cell
def _():
def multiple_conditional_probability(
p_intersection_all, p_intersection_conditions, p_condition
):
"""Calculate P(E|F,G) = P(E∩F|G)/P(F|G) = P(E∩F∩G)/P(F∩G)"""
if p_condition == 0:
raise ValueError("Cannot condition on an impossible event")
if p_intersection_conditions == 0:
raise ValueError(
"Cannot condition on an impossible combination of events"
)
if p_intersection_all > p_intersection_conditions:
raise ValueError("P(E∩F∩G) cannot be greater than P(F∩G)")
return p_intersection_all / p_intersection_conditions
return (multiple_conditional_probability,)
@app.cell
def _(multiple_conditional_probability):
# Example: College admissions
# E: Getting admitted
# F: High GPA
# G: Good test scores
# P(E∩F∩G) = P(Admitted ∩ HighGPA ∩ GoodScore) = 0.15
# P(F∩G) = P(HighGPA ∩ GoodScore) = 0.25
p_admit_given_both = multiple_conditional_probability(0.15, 0.25, 0.25)
print("College Admissions Example:")
print(
f"P(Admitted | High GPA, Good Scores) = {p_admit_given_both}"
) # Should be 0.6
# Error case: impossible condition
try:
multiple_conditional_probability(0.3, 0.2, 0.2)
except ValueError as e:
print(f"\nError case: {e}")
return (p_admit_given_both,)
@app.cell(hide_code=True)
def _(mo):
mo.md(
r"""
## 🤔 Test Your Understanding
Which of these statements about conditional probability are true?
<details>
<summary>Knowing F occurred always decreases the probability of E</summary>
❌ False! Conditioning on F can either increase or decrease P(E), depending on how E and F are related.
</details>
<details>
<summary>P(E|F) represents entering a new probability universe where F has occurred</summary>
✅ True! We restrict ourselves to only the outcomes where F occurred, making F our new sample space.
</details>
<details>
<summary>If P(E|F) = P(E), then E and F must be the same event</summary>
❌ False! This actually means E and F are independent - knowing one doesn't affect the other.
</details>
<details>
<summary>P(E|F) can be calculated by dividing P(E∩F) by P(F)</summary>
✅ True! This is the fundamental definition of conditional probability.
</details>
"""
)
return
@app.cell(hide_code=True)
def _(mo):
mo.md(
"""
## Summary
You've learned:
- How conditional probability updates our beliefs with new information
- The formula $P(E \mid F) = P(E \cap F)/P(F)$ and its intuition
- How probability rules work in conditional universes
- How to handle multiple conditions
In the next lesson, we'll explore **independence** - when knowing about one event
tells us nothing about another.
"""
)
return
if __name__ == "__main__":
app.run()
|