We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 313fee0 commit eca3b2aCopy full SHA for eca3b2a
tests/test_text.py
@@ -47,6 +47,32 @@ def test_text_models():
47
48
assert P3.cond_prob['in', 'order'].dictionary == {'to': 6}
49
50
+ test_string = 'unigram'
51
+ wordseq = words(test_string)
52
+
53
+ P1 = UnigramTextModel(wordseq)
54
55
+ assert P1.dictionary == {('unigram'): 1}
56
57
+ test_string = 'bigram text'
58
59
60
+ P2 = NgramTextModel(2, wordseq)
61
62
+ assert (P2.dictionary == {('', 'bigram'): 1, ('bigram', 'text'): 1} or
63
+ P2.dictionary == {('bigram', 'text'): 1, ('', 'bigram'): 1})
64
65
66
+ test_string = 'test trigram text'
67
68
69
+ P3 = NgramTextModel(3, wordseq)
70
71
+ assert ('', '', 'test') in P3.dictionary
72
+ assert ('', 'test', 'trigram') in P3.dictionary
73
+ assert ('test', 'trigram', 'text') in P3.dictionary
74
+ assert len(P3.dictionary) == 3
75
76
77
def test_viterbi_segmentation():
78
flatland = DataFile("EN-text/flatland.txt").read()
text.py
@@ -55,7 +55,7 @@ def add_sequence(self, words):
Prefix some copies of the empty word, '', to make the start work."""
n = self.n
words = ['', ] * (n - 1) + words
- for i in range(len(words) - n):
+ for i in range(len(words) - n + 1):
self.add(tuple(words[i:i + n]))
def samples(self, nwords):
0 commit comments