TheCDC
diff --git a/‎tests/test_text.py‎
Lines changed: 26 additions & 0 deletions b/‎tests/test_text.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎text.py‎
Lines changed: 1 addition & 1 deletion b/‎text.py‎
Lines changed: 1 addition & 1 deletion
@@ -47,6 +47,32 @@ def test_text_models():
 
     assert P3.cond_prob['in', 'order'].dictionary == {'to': 6}
 
+    test_string = 'unigram'
+    wordseq = words(test_string)
+
+    P1 = UnigramTextModel(wordseq)
+
+    assert P1.dictionary == {('unigram'): 1}
+
+    test_string = 'bigram text'
+    wordseq = words(test_string)
+
+    P2 = NgramTextModel(2, wordseq)
+
+    assert (P2.dictionary == {('', 'bigram'): 1, ('bigram', 'text'): 1} or
+            P2.dictionary == {('bigram', 'text'): 1, ('', 'bigram'): 1})
+
+
+    test_string = 'test trigram text'
+    wordseq = words(test_string)
+
+    P3 = NgramTextModel(3, wordseq)
+
+    assert ('', '', 'test') in P3.dictionary
+    assert ('', 'test', 'trigram') in P3.dictionary
+    assert ('test', 'trigram', 'text') in P3.dictionary
+    assert len(P3.dictionary) == 3
+
 
 def test_viterbi_segmentation():
     flatland = DataFile("EN-text/flatland.txt").read()
 
@@ -55,7 +55,7 @@ def add_sequence(self, words):
         Prefix some copies of the empty word, '', to make the start work."""
         n = self.n
         words = ['', ] * (n - 1) + words
-        for i in range(len(words) - n):
+        for i in range(len(words) - n + 1):
             self.add(tuple(words[i:i + n]))
 
     def samples(self, nwords):