@@ -26,6 +26,7 @@ def samples(self, n):
2626 return ' ' .join (self .sample () for i in range (n ))
2727
2828
29+
2930class NgramTextModel (CountingProbDist ):
3031
3132 """This is a discrete probability distribution over n-tuples of words.
@@ -50,12 +51,16 @@ def add(self, ngram):
5051 self .cond_prob [ngram [:- 1 ]] = CountingProbDist ()
5152 self .cond_prob [ngram [:- 1 ]].add (ngram [- 1 ])
5253
54+ def add_empty (self , words , n ):
55+ return ['' ] * (n - 1 ) + words
56+
5357 def add_sequence (self , words ):
5458 """Add each of the tuple words[i:i+n], using a sliding window.
5559 Prefix some copies of the empty word, '', to make the start work."""
5660 n = self .n
57- words = ['' , ] * (n - 1 ) + words
58- for i in range (len (words ) - n + 1 ):
61+ words = self .add_empty (words , n )
62+
63+ for i in range (len (words ) - n ):
5964 self .add (tuple (words [i :i + n ]))
6065
6166 def samples (self , nwords ):
@@ -72,6 +77,15 @@ def samples(self, nwords):
7277 nminus1gram = nminus1gram [1 :] + (wn ,)
7378 return ' ' .join (output )
7479
80+
81+ class NgramCharModel (NgramTextModel ):
82+ def add_empty (self , words , n ):
83+ return ' ' * (n - 1 ) + words
84+
85+ def add_sequence (self , words ):
86+ for word in words :
87+ super ().add_sequence (word )
88+
7589# ______________________________________________________________________________
7690
7791
0 commit comments