Skip to content

Commit eca3b2a

Browse files
lucasmouranorvig
authored andcommitted
Fix NgramTextModel bug (aimacode#412)
* Fix NgramTextModel bug * Add new tests for NgramTextModel
1 parent 313fee0 commit eca3b2a

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

tests/test_text.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,32 @@ def test_text_models():
4747

4848
assert P3.cond_prob['in', 'order'].dictionary == {'to': 6}
4949

50+
test_string = 'unigram'
51+
wordseq = words(test_string)
52+
53+
P1 = UnigramTextModel(wordseq)
54+
55+
assert P1.dictionary == {('unigram'): 1}
56+
57+
test_string = 'bigram text'
58+
wordseq = words(test_string)
59+
60+
P2 = NgramTextModel(2, wordseq)
61+
62+
assert (P2.dictionary == {('', 'bigram'): 1, ('bigram', 'text'): 1} or
63+
P2.dictionary == {('bigram', 'text'): 1, ('', 'bigram'): 1})
64+
65+
66+
test_string = 'test trigram text'
67+
wordseq = words(test_string)
68+
69+
P3 = NgramTextModel(3, wordseq)
70+
71+
assert ('', '', 'test') in P3.dictionary
72+
assert ('', 'test', 'trigram') in P3.dictionary
73+
assert ('test', 'trigram', 'text') in P3.dictionary
74+
assert len(P3.dictionary) == 3
75+
5076

5177
def test_viterbi_segmentation():
5278
flatland = DataFile("EN-text/flatland.txt").read()

text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def add_sequence(self, words):
5555
Prefix some copies of the empty word, '', to make the start work."""
5656
n = self.n
5757
words = ['', ] * (n - 1) + words
58-
for i in range(len(words) - n):
58+
for i in range(len(words) - n + 1):
5959
self.add(tuple(words[i:i + n]))
6060

6161
def samples(self, nwords):

0 commit comments

Comments
 (0)