99def test_text_models ():
1010 flatland = open_data ("EN-text/flatland.txt" ).read ()
1111 wordseq = words (flatland )
12- P1 = UnigramTextModel (wordseq )
13- P2 = NgramTextModel (2 , wordseq )
14- P3 = NgramTextModel (3 , wordseq )
12+ P1 = UnigramWordModel (wordseq )
13+ P2 = NgramWordModel (2 , wordseq )
14+ P3 = NgramWordModel (3 , wordseq )
1515
1616 # The most frequent entries in each model
1717 assert P1 .top (10 ) == [(2081 , 'the' ), (1479 , 'of' ), (1021 , 'and' ),
@@ -39,7 +39,6 @@ def test_text_models():
3939
4040 assert isclose (P2 ['of' , 'the' ], 0.0108 , rel_tol = 0.01 )
4141
42- assert isclose (P3 ['' , '' , 'but' ], 0.0 , rel_tol = 0.001 )
4342 assert isclose (P3 ['' , '' , 'but' ], 0.0 , rel_tol = 0.001 )
4443 assert isclose (P3 ['so' , 'as' , 'to' ], 0.000323 , rel_tol = 0.001 )
4544
@@ -50,14 +49,14 @@ def test_text_models():
5049 test_string = 'unigram'
5150 wordseq = words (test_string )
5251
53- P1 = UnigramTextModel (wordseq )
52+ P1 = UnigramWordModel (wordseq )
5453
5554 assert P1 .dictionary == {('unigram' ): 1 }
5655
5756 test_string = 'bigram text'
5857 wordseq = words (test_string )
5958
60- P2 = NgramTextModel (2 , wordseq )
59+ P2 = NgramWordModel (2 , wordseq )
6160
6261 assert (P2 .dictionary == {('' , 'bigram' ): 1 , ('bigram' , 'text' ): 1 } or
6362 P2 .dictionary == {('bigram' , 'text' ): 1 , ('' , 'bigram' ): 1 })
@@ -66,7 +65,7 @@ def test_text_models():
6665 test_string = 'test trigram text'
6766 wordseq = words (test_string )
6867
69- P3 = NgramTextModel (3 , wordseq )
68+ P3 = NgramWordModel (3 , wordseq )
7069
7170 assert ('' , '' , 'test' ) in P3 .dictionary
7271 assert ('' , 'test' , 'trigram' ) in P3 .dictionary
@@ -75,13 +74,14 @@ def test_text_models():
7574
7675
7776def test_char_models ():
78- test_string = 'unigram'
77+ test_string = 'test unigram'
7978 wordseq = words (test_string )
80- P1 = NgramCharModel ( 1 , wordseq )
79+ P1 = UnigramCharModel ( wordseq )
8180
82- assert len (P1 .dictionary ) == len (test_string )
83- for char in test_string :
84- assert tuple (char ) in P1 .dictionary
81+ expected_unigrams = {'n' : 1 , 's' : 1 , 'e' : 1 , 'i' : 1 , 'm' : 1 , 'g' : 1 , 'r' : 1 , 'a' : 1 , 't' : 2 , 'u' : 1 }
82+ assert len (P1 .dictionary ) == len (expected_unigrams )
83+ for char in test_string .replace (' ' , '' ):
84+ assert char in P1 .dictionary
8585
8686 test_string = 'a b c'
8787 wordseq = words (test_string )
@@ -143,7 +143,7 @@ def test_char_models():
143143def test_viterbi_segmentation ():
144144 flatland = open_data ("EN-text/flatland.txt" ).read ()
145145 wordseq = words (flatland )
146- P = UnigramTextModel (wordseq )
146+ P = UnigramWordModel (wordseq )
147147 text = "itiseasytoreadwordswithoutspaces"
148148
149149 s , p = viterbi_segment (text , P )
0 commit comments