Skip to content

Commit ab6669c

Browse files
lucasmouranorvig
authored andcommitted
Add tests to NgramCharModel (aimacode#485)
1 parent b0b1d6f commit ab6669c

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

tests/test_text.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,72 @@ def test_text_models():
7474
assert len(P3.dictionary) == 3
7575

7676

77+
def test_char_models():
78+
test_string = 'unigram'
79+
wordseq = words(test_string)
80+
P1 = NgramCharModel(1, wordseq)
81+
82+
assert len(P1.dictionary) == len(test_string)
83+
for char in test_string:
84+
assert tuple(char) in P1.dictionary
85+
86+
test_string = 'a b c'
87+
wordseq = words(test_string)
88+
P1 = NgramCharModel(1, wordseq)
89+
90+
assert len(P1.dictionary) == len(test_string.split())
91+
for char in test_string.split():
92+
assert tuple(char) in P1.dictionary
93+
94+
test_string = 'bigram'
95+
wordseq = words(test_string)
96+
P2 = NgramCharModel(2, wordseq)
97+
98+
expected_bigrams = {(' ', 'b'): 1, ('b', 'i'): 1, ('i', 'g'): 1, ('g', 'r'): 1, ('r', 'a'): 1, ('a', 'm'): 1}
99+
100+
assert len(P2.dictionary) == len(expected_bigrams)
101+
for bigram, count in expected_bigrams.items():
102+
assert bigram in P2.dictionary
103+
assert P2.dictionary[bigram] == count
104+
105+
test_string = 'bigram bigram'
106+
wordseq = words(test_string)
107+
P2 = NgramCharModel(2, wordseq)
108+
109+
expected_bigrams = {(' ', 'b'): 2, ('b', 'i'): 2, ('i', 'g'): 2, ('g', 'r'): 2, ('r', 'a'): 2, ('a', 'm'): 2}
110+
111+
assert len(P2.dictionary) == len(expected_bigrams)
112+
for bigram, count in expected_bigrams.items():
113+
assert bigram in P2.dictionary
114+
assert P2.dictionary[bigram] == count
115+
116+
test_string = 'trigram'
117+
wordseq = words(test_string)
118+
P3 = NgramCharModel(3, wordseq)
119+
120+
expected_trigrams = {(' ', ' ', 't'): 1, (' ', 't', 'r'): 1, ('t', 'r', 'i'): 1,
121+
('r', 'i', 'g'): 1, ('i', 'g', 'r'): 1, ('g', 'r', 'a'): 1,
122+
('r', 'a', 'm'): 1}
123+
124+
assert len(P3.dictionary) == len(expected_trigrams)
125+
for bigram, count in expected_trigrams.items():
126+
assert bigram in P3.dictionary
127+
assert P3.dictionary[bigram] == count
128+
129+
test_string = 'trigram trigram trigram'
130+
wordseq = words(test_string)
131+
P3 = NgramCharModel(3, wordseq)
132+
133+
expected_trigrams = {(' ', ' ', 't'): 3, (' ', 't', 'r'): 3, ('t', 'r', 'i'): 3,
134+
('r', 'i', 'g'): 3, ('i', 'g', 'r'): 3, ('g', 'r', 'a'): 3,
135+
('r', 'a', 'm'): 3}
136+
137+
assert len(P3.dictionary) == len(expected_trigrams)
138+
for bigram, count in expected_trigrams.items():
139+
assert bigram in P3.dictionary
140+
assert P3.dictionary[bigram] == count
141+
142+
77143
def test_viterbi_segmentation():
78144
flatland = DataFile("EN-text/flatland.txt").read()
79145
wordseq = words(flatland)

0 commit comments

Comments
 (0)