@@ -116,6 +116,7 @@ def __init__(self, name, rules, lexicon):
116116 self .rules = rules
117117 self .lexicon = lexicon
118118 self .categories = defaultdict (list )
119+
119120 for lhs in lexicon :
120121 for word , prob in lexicon [lhs ]:
121122 self .categories [word ].append ((lhs , prob ))
@@ -128,6 +129,16 @@ def isa(self, word, cat):
128129 """Return True iff word is of category cat"""
129130 return cat in [c for c , _ in self .categories [word ]]
130131
132+ def cnf_rules (self ):
133+ """Returns the tuple (X, Y, Z, p) for rules in the form:
134+ X -> Y Z [p]"""
135+ cnf = []
136+ for X , rules in self .rules .items ():
137+ for (Y , Z ), p in rules :
138+ cnf .append ((X , Y , Z , p ))
139+
140+ return cnf
141+
131142 def generate_random (self , S = 'S' ):
132143 """Replace each token in S by a random entry in grammar (recursively).
133144 Returns a tuple of (sentence, probability)."""
@@ -189,11 +200,48 @@ def __repr__(self):
189200 V = 'saw | liked | feel'
190201 ))
191202
192- E_NP_ = Grammar ('E_NP_' , # another trivial grammar for testing
203+ E_NP_ = Grammar ('E_NP_' , # Another Trivial Grammar for testing
193204 Rules (NP = 'Adj NP | N' ),
194205 Lexicon (Adj = 'happy | handsome | hairy' ,
195206 N = 'man' ))
196207
208+ E_Prob = ProbGrammar ('E_Prob' , # The Probabilistic Grammar from the notebook
209+ ProbRules (
210+ S = "NP VP [0.6] | S Conjuction S [0.4]" ,
211+ NP = "Pronoun [0.2] | Name [0.05] | Noun [0.2] | Article Noun [0.15] \
212+ | Article Adjs Noun [0.1] | Digit [0.05] | NP PP [0.15] | NP RelClause [0.1]" ,
213+ VP = "Verb [0.3] | VP NP [0.2] | VP Adjective [0.25] | VP PP [0.15] | VP Adverb [0.1]" ,
214+ Adjs = "Adjective [0.5] | Adjective Adjs [0.5]" ,
215+ PP = "Preposition NP [1]" ,
216+ RelClause = "RelPro VP [1]"
217+ ),
218+ ProbLexicon (
219+ Verb = "is [0.5] | say [0.3] | are [0.2]" ,
220+ Noun = "robot [0.4] | sheep [0.4] | fence [0.2]" ,
221+ Adjective = "good [0.5] | new [0.2] | sad [0.3]" ,
222+ Adverb = "here [0.6] | lightly [0.1] | now [0.3]" ,
223+ Pronoun = "me [0.3] | you [0.4] | he [0.3]" ,
224+ RelPro = "that [0.5] | who [0.3] | which [0.2]" ,
225+ Name = "john [0.4] | mary [0.4] | peter [0.2]" ,
226+ Article = "the [0.5] | a [0.25] | an [0.25]" ,
227+ Preposition = "to [0.4] | in [0.3] | at [0.3]" ,
228+ Conjuction = "and [0.5] | or [0.2] | but [0.3]" ,
229+ Digit = "0 [0.35] | 1 [0.35] | 2 [0.3]"
230+ ))
231+
232+ E_Prob_Chomsky = ProbGrammar ('E_Prob_Chomsky' , # A Probabilistic Grammar in CNF
233+ ProbRules (
234+ S = 'NP VP [1]' ,
235+ NP = 'Article Noun [0.6] | Adjective Noun [0.4]' ,
236+ VP = 'Verb NP [0.5] | Verb Adjective [0.5]' ,
237+ ),
238+ ProbLexicon (
239+ Article = 'the [0.5] | a [0.25] | an [0.25]' ,
240+ Noun = 'robot [0.4] | sheep [0.4] | fence [0.2]' ,
241+ Adjective = 'good [0.5] | new [0.2] | sad [0.3]' ,
242+ Verb = 'is [0.5] | say [0.3] | are [0.2]'
243+ ))
244+
197245
198246# ______________________________________________________________________________
199247# Chart Parsing
@@ -236,7 +284,7 @@ def parse(self, words, S='S'):
236284 return self .chart
237285
238286 def add_edge (self , edge ):
239- "Add edge to chart, and see if it extends or predicts another edge."
287+ """ Add edge to chart, and see if it extends or predicts another edge."" "
240288 start , end , lhs , found , expects = edge
241289 if edge not in self .chart [end ]:
242290 self .chart [end ].append (edge )
@@ -248,21 +296,21 @@ def add_edge(self, edge):
248296 self .predictor (edge )
249297
250298 def scanner (self , j , word ):
251- "For each edge expecting a word of this category here, extend the edge."
299+ """ For each edge expecting a word of this category here, extend the edge."" "
252300 for (i , j , A , alpha , Bb ) in self .chart [j ]:
253301 if Bb and self .grammar .isa (word , Bb [0 ]):
254302 self .add_edge ([i , j + 1 , A , alpha + [(Bb [0 ], word )], Bb [1 :]])
255303
256304 def predictor (self , edge ):
257- "Add to chart any rules for B that could help extend this edge."
305+ """ Add to chart any rules for B that could help extend this edge."" "
258306 (i , j , A , alpha , Bb ) = edge
259307 B = Bb [0 ]
260308 if B in self .grammar .rules :
261309 for rhs in self .grammar .rewrites_for (B ):
262310 self .add_edge ([j , j , B , [], rhs ])
263311
264312 def extender (self , edge ):
265- "See what edges can be extended by this edge."
313+ """ See what edges can be extended by this edge."" "
266314 (j , k , B , _ , _ ) = edge
267315 for (i , j , A , alpha , B1b ) in self .chart [j ]:
268316 if B1b and B == B1b [0 ]:
@@ -273,23 +321,26 @@ def extender(self, edge):
273321# CYK Parsing
274322
275323def CYK_parse (words , grammar ):
276- "[Figure 23.5]"
324+ """ [Figure 23.5] "" "
277325 # We use 0-based indexing instead of the book's 1-based.
278326 N = len (words )
279327 P = defaultdict (float )
328+
280329 # Insert lexical rules for each word.
281330 for (i , word ) in enumerate (words ):
282- for (X , p ) in grammar .categories [word ]: # XXX grammar.categories needs changing, above
331+ for (X , p ) in grammar .categories [word ]:
283332 P [X , i , 1 ] = p
333+
284334 # Combine first and second parts of right-hand sides of rules,
285335 # from short to long.
286336 for length in range (2 , N + 1 ):
287337 for start in range (N - length + 1 ):
288338 for len1 in range (1 , length ): # N.B. the book incorrectly has N instead of length
289339 len2 = length - len1
290- for (X , Y , Z , p ) in grammar .cnf_rules (): # XXX grammar needs this method
340+ for (X , Y , Z , p ) in grammar .cnf_rules ():
291341 P [X , start , length ] = max (P [X , start , length ],
292342 P [Y , start , len1 ] * P [Z , start + len1 , len2 ] * p )
343+
293344 return P
294345
295346
@@ -395,6 +446,7 @@ def relevant_pages(query):
395446 hit_intersection = hit_intersection .intersection (hit_list )
396447 return {addr : pagesIndex [addr ] for addr in hit_intersection }
397448
449+
398450def normalize (pages ):
399451 """Normalize divides each page's score by the sum of the squares of all
400452 pages' scores (separately for both the authority and hub scores).
0 commit comments