File size: 1,373 Bytes
d916065
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import unittest

import pytest

from nltk import ConditionalFreqDist, tokenize


class TestEmptyCondFreq(unittest.TestCase):
    def test_tabulate(self):
        empty = ConditionalFreqDist()
        self.assertEqual(empty.conditions(), [])
        with pytest.raises(ValueError):
            empty.tabulate(conditions="BUG")  # nonexistent keys shouldn't be added
        self.assertEqual(empty.conditions(), [])

    def test_plot(self):
        empty = ConditionalFreqDist()
        self.assertEqual(empty.conditions(), [])
        empty.plot(conditions=["BUG"])  # nonexistent keys shouldn't be added
        self.assertEqual(empty.conditions(), [])

    def test_increment(self):
        # make sure that we can still mutate cfd normally
        text = "cow cat mouse cat tiger"
        cfd = ConditionalFreqDist()

        # create cfd with word length as condition
        for word in tokenize.word_tokenize(text):
            condition = len(word)
            cfd[condition][word] += 1

        self.assertEqual(cfd.conditions(), [3, 5])

        # incrementing previously unseen key is still possible
        cfd[2]["hi"] += 1
        self.assertCountEqual(cfd.conditions(), [3, 5, 2])  # new condition added
        self.assertEqual(
            cfd[2]["hi"], 1
        )  # key's frequency incremented from 0 (unseen) to 1