File size: 11,441 Bytes
53609e5
 
 
 
 
 
 
 
 
 
 
 
a50dbac
53609e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a50dbac
 
53609e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a50dbac
53609e5
a50dbac
53609e5
 
 
 
a50dbac
53609e5
 
 
 
 
a50dbac
 
 
 
 
 
 
 
53609e5
 
 
a50dbac
53609e5
 
a50dbac
53609e5
 
 
 
 
 
 
 
 
a50dbac
53609e5
 
 
a50dbac
53609e5
a50dbac
 
 
 
 
 
 
 
 
 
 
 
53609e5
 
 
 
a50dbac
53609e5
 
a50dbac
 
 
 
 
 
 
53609e5
 
 
 
 
 
 
 
 
 
 
 
 
a50dbac
53609e5
 
 
 
 
 
a50dbac
53609e5
 
 
a50dbac
 
 
 
 
 
 
 
53609e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a50dbac
53609e5
 
 
 
 
 
 
 
 
 
a50dbac
53609e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a50dbac
 
 
 
53609e5
 
 
 
 
 
 
 
 
 
 
 
 
a50dbac
53609e5
 
 
 
a50dbac
53609e5
 
 
 
 
 
 
 
 
 
a50dbac
 
 
53609e5
 
 
 
a50dbac
 
 
53609e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a50dbac
 
 
53609e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a50dbac
53609e5
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
# /// script
# requires-python = ">=3.10"
# dependencies = [
#     "marimo",
#     "matplotlib==3.10.0",
#     "matplotlib-venn==1.1.1",
#     "numpy==2.2.2",
# ]
# ///

import marimo

__generated_with = "0.11.4"
app = marimo.App(width="medium", app_title="Conditional Probability")


@app.cell
def _():
    import marimo as mo
    return (mo,)


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        r"""
        # Conditional Probability

        _This notebook is a computational companion to the book ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/cond_prob/), by Stanford professor Chris Piech._

        In probability theory, we often want to update our beliefs when we receive new information. 
        Conditional probability helps us formalize this process by calculating "_what is the chance of 
        event $E$ happening given that we have already observed some other event $F$?_"[<sup>1</sup>](https://chrispiech.github.io/probabilityForComputerScientists/en/part1/cond_prob/)

        When we condition on an event $F$:

        - We enter the universe where $F$ has occurred
        - Only outcomes consistent with $F$ are possible
        - Our sample space reduces to $F$
        """
    )
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        r"""
        ## Definition of Conditional Probability

        The probability of event $E$ given that event $F$ has occurred is denoted as $P(E \mid F)$ and is defined as:

        $$P(E \mid F) = \frac{P(E \cap F)}{P(F)}$$

        This formula tells us that the conditional probability is the probability of both events occurring 
        divided by the probability of the conditioning event.

        Let's start with a visual example.
        """
    )
    return


@app.cell
def _():
    import matplotlib.pyplot as plt
    from matplotlib_venn import venn3
    import numpy as np
    return np, plt, venn3


@app.cell(hide_code=True)
def _(mo, plt, venn3):
    # Create figure with square boundaries
    plt.figure(figsize=(10, 3))

    # Draw square sample space first
    rect = plt.Rectangle((-2, -2), 4, 4, fill=False, color="gray", linestyle="--")
    plt.gca().add_patch(rect)

    # Set the axis limits to show the full rectangle
    plt.xlim(-2.5, 2.5)
    plt.ylim(-2.5, 2.5)

    # Create Venn diagram showing E and F
    # For venn3, subsets order is: (100, 010, 110, 001, 101, 011, 111)
    # Representing: (A, B, AB, C, AC, BC, ABC)
    v = venn3(subsets=(30, 20, 10, 40, 0, 0, 0), set_labels=("E", "F", "Rest"))

    # Customize colors
    if v:
        for id in ["100", "010", "110", "001"]:
            if v.get_patch_by_id(id):
                if id == "100":
                    v.get_patch_by_id(id).set_color("#ffcccc")  # Light red for E
                elif id == "010":
                    v.get_patch_by_id(id).set_color("#ccffcc")  # Light green for F
                elif id == "110":
                    v.get_patch_by_id(id).set_color(
                        "#e6ffe6"
                    )  # Lighter green for intersection
                elif id == "001":
                    v.get_patch_by_id(id).set_color("white")  # White for rest

    plt.title("Conditional Probability in Sample Space")

    # Remove ticks but keep the box visible
    plt.gca().set_yticks([])
    plt.gca().set_xticks([])
    plt.axis("on")

    # Add sample space annotation with arrow
    plt.annotate(
        "Sample Space (100)",
        xy=(-1.5, 1.5),
        xytext=(-2.2, 2),
        bbox=dict(boxstyle="round,pad=0.5", fc="white", ec="gray"),
        arrowprops=dict(arrowstyle="->"),
    )

    # Add explanation
    explanation = mo.md(r"""
    ### Visual Intuition

    In our sample space of 100 outcomes:

    - Event $E$ occurs in 40 cases (red region: 30 + 10)
    - Event $F$ occurs in 30 cases (green region: 20 + 10)
    - Both events occur together in 10 cases (overlap)
    - Remaining cases: 40 (to complete sample space of 100)

    When we condition on $F$:
    $$P(E \mid F) = \frac{P(E \cap F)}{P(F)} = \frac{10}{30} = \frac{1}{3} \approx 0.33$$

    This means: When we know $F$ has occurred (restricting ourselves to the green region),
    the probability of $E$ also occurring is $\frac{1}{3}$ - as 10 out of the 30 cases in the 
    green region also belong to the red region.
    """)

    mo.vstack([mo.center(plt.gcf()), explanation])
    return explanation, id, rect, v


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        r"Next, here's a function that computes $P(E \mid F)$, given $P( E \cap F)$ and $P(F)$"
    )
    return


@app.cell
def _():
    def conditional_probability(p_intersection, p_condition):
        if p_condition == 0:
            raise ValueError("Cannot condition on an impossible event")
        if p_intersection > p_condition:
            raise ValueError("P(E∩F) cannot be greater than P(F)")

        return p_intersection / p_condition
    return (conditional_probability,)


@app.cell
def _(conditional_probability):
    # Example 1: Rolling a die
    # E: Rolling an even number (2,4,6)
    # F: Rolling a number greater than 3 (4,5,6)
    p_even_given_greater_than_3 = conditional_probability(2 / 6, 3 / 6)
    print("Example 1: Rolling a die")
    print(f"P(Even | >3) = {p_even_given_greater_than_3}")  # Should be 2/3
    return (p_even_given_greater_than_3,)


@app.cell
def _(conditional_probability):
    # Example 2: Cards
    # E: Drawing a Heart
    # F: Drawing a Face card (J,Q,K)
    p_heart_given_face = conditional_probability(3 / 52, 12 / 52)
    print("\nExample 2: Drawing cards")
    print(f"P(Heart | Face card) = {p_heart_given_face}")  # Should be 1/4
    return (p_heart_given_face,)


@app.cell
def _(conditional_probability):
    # Example 3: Student grades
    # E: Getting an A
    # F: Studying more than 3 hours
    p_a_given_study = conditional_probability(0.24, 0.40)
    print("\nExample 3: Student grades")
    print(f"P(A | Studied >3hrs) = {p_a_given_study}")  # Should be 0.6
    return (p_a_given_study,)


@app.cell
def _(conditional_probability):
    # Example 4: Weather
    # E: Raining
    # F: Cloudy
    p_rain_given_cloudy = conditional_probability(0.15, 0.30)
    print("\nExample 4: Weather")
    print(f"P(Rain | Cloudy) = {p_rain_given_cloudy}")  # Should be 0.5
    return (p_rain_given_cloudy,)


@app.cell
def _(conditional_probability):
    # Example 5: Error cases
    print("\nExample 5: Error cases")
    try:
        # Cannot condition on impossible event
        conditional_probability(0.5, 0)
    except ValueError as e:
        print(f"Error 1: {e}")

    try:
        # Intersection cannot be larger than condition
        conditional_probability(0.7, 0.5)
    except ValueError as e:
        print(f"Error 2: {e}")
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        r"""
        ## The Conditional Paradigm

        When we condition on an event, we enter a new probability universe. In this universe:

        1. All probability axioms still hold
        2. We must consistently condition on the same event
        3. Our sample space becomes the conditioning event

        Here's how our familiar probability rules look when conditioned on event $G$:

        | Rule | Original | Conditioned on $G$ |
        |------|----------|-------------------|
        | Axiom 1 | $0 \leq P(E) \leq 1$ | $0 \leq P(E \mid G) \leq 1$ |
        | Axiom 2 | $P(S) = 1$ | $P(S \mid G) = 1$ |
        | Axiom 3* | $P(E \cup F) = P(E) + P(F)$ | $P(E \cup F \mid G) = P(E \mid G) + P(F \mid G)$ |
        | Complement | $P(E^C) = 1 - P(E)$ | $P(E^C \mid G) = 1 - P(E \mid G)$ |

        *_For mutually exclusive events_
        """
    )
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        r"""
        ## Multiple Conditions

        We can condition on multiple events. The notation $P(E \mid F,G)$ means "_the probability of $E$ 
        occurring, given that both $F$ and $G$ have occurred._"

        The conditional probability formula still holds in the universe where $G$ has occurred:

        $$P(E \mid F,G) = \frac{P(E \cap F \mid G)}{P(F \mid G)}$$

        This is a powerful extension that allows us to update our probabilities as we receive 
        multiple pieces of information.
        """
    )
    return


@app.cell
def _():
    def multiple_conditional_probability(
        p_intersection_all, p_intersection_conditions, p_condition
    ):
        """Calculate P(E|F,G) = P(E∩F|G)/P(F|G) = P(E∩F∩G)/P(F∩G)"""
        if p_condition == 0:
            raise ValueError("Cannot condition on an impossible event")
        if p_intersection_conditions == 0:
            raise ValueError(
                "Cannot condition on an impossible combination of events"
            )
        if p_intersection_all > p_intersection_conditions:
            raise ValueError("P(E∩F∩G) cannot be greater than P(F∩G)")

        return p_intersection_all / p_intersection_conditions
    return (multiple_conditional_probability,)


@app.cell
def _(multiple_conditional_probability):
    # Example: College admissions
    # E: Getting admitted
    # F: High GPA
    # G: Good test scores

    # P(E∩F∩G) = P(Admitted ∩ HighGPA ∩ GoodScore) = 0.15
    # P(F∩G) = P(HighGPA ∩ GoodScore) = 0.25

    p_admit_given_both = multiple_conditional_probability(0.15, 0.25, 0.25)
    print("College Admissions Example:")
    print(
        f"P(Admitted | High GPA, Good Scores) = {p_admit_given_both}"
    )  # Should be 0.6

    # Error case: impossible condition
    try:
        multiple_conditional_probability(0.3, 0.2, 0.2)
    except ValueError as e:
        print(f"\nError case: {e}")
    return (p_admit_given_both,)


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        r"""
        ## 🤔 Test Your Understanding

        Which of these statements about conditional probability are true?

        <details>
        <summary>Knowing F occurred always decreases the probability of E</summary>
        ❌ False! Conditioning on F can either increase or decrease P(E), depending on how E and F are related.
        </details>

        <details>
        <summary>P(E|F) represents entering a new probability universe where F has occurred</summary>
        ✅ True! We restrict ourselves to only the outcomes where F occurred, making F our new sample space.
        </details>

        <details>
        <summary>If P(E|F) = P(E), then E and F must be the same event</summary>
        ❌ False! This actually means E and F are independent - knowing one doesn't affect the other.
        </details>

        <details>
        <summary>P(E|F) can be calculated by dividing P(E∩F) by P(F)</summary>
        ✅ True! This is the fundamental definition of conditional probability.
        </details>
        """
    )
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        """
        ## Summary

        You've learned:

        - How conditional probability updates our beliefs with new information
        - The formula $P(E \mid F) = P(E \cap F)/P(F)$ and its intuition
        - How probability rules work in conditional universes
        - How to handle multiple conditions

        In the next lesson, we'll explore **independence** - when knowing about one event 
        tells us nothing about another.
        """
    )
    return


if __name__ == "__main__":
    app.run()