11import  pytest 
22import  nlp 
3- from  nlp  import  loadPageHTML , stripRawHTML , determineInlinks ,  findOutlinks , onlyWikipediaURLS 
3+ from  nlp  import  loadPageHTML , stripRawHTML , findOutlinks , onlyWikipediaURLS 
44from  nlp  import  expand_pages , relevant_pages , normalize , ConvergenceDetector , getInlinks 
5- from  nlp  import  getOutlinks , Page ,  HITS 
5+ from  nlp  import  getOutlinks , Page 
66from  nlp  import  Rules , Lexicon 
77# Clumsy imports because we want to access certain nlp.py globals explicitly, because 
88# they are accessed by function's within nlp.py 
99
10+ 
1011def  test_rules ():
1112    assert  Rules (A = "B C | D E" ) ==  {'A' : [['B' , 'C' ], ['D' , 'E' ]]}
1213
@@ -27,18 +28,18 @@ def test_lexicon():
2728            href="/wiki/TestLiving" href="/wiki/TestMan" >""" 
2829testHTML2  =  "Nothing" 
2930
30- pA  =  Page ("A" , 1 , 6 , ["B" ,"C" ,"E" ],["D" ])
31- pB  =  Page ("B" , 2 , 5 , ["E" ],["A" ,"C" ,"D" ])
32- pC  =  Page ("C" , 3 , 4 , ["B" ,"E" ],["A" ,"D" ])
33- pD  =  Page ("D" , 4 , 3 , ["A" ,"B" ,"C" ,"E" ],[])
34- pE  =  Page ("E" , 5 , 2 , [],["A" ,"B" ,"C" ,"D" ,"F" ])
35- pF  =  Page ("F" , 6 , 1 , ["E" ],[])
36- pageDict  =  {pA .address :pA ,pB .address :pB ,pC .address :pC ,
37-             pD .address :pD ,pE .address :pE ,pF .address :pF }
31+ pA  =  Page ("A" , 1 , 6 , ["B" ,  "C" ,  "E" ],  ["D" ])
32+ pB  =  Page ("B" , 2 , 5 , ["E" ],  ["A" ,  "C" ,  "D" ])
33+ pC  =  Page ("C" , 3 , 4 , ["B" ,  "E" ],  ["A" ,  "D" ])
34+ pD  =  Page ("D" , 4 , 3 , ["A" ,  "B" ,  "C" ,  "E" ],  [])
35+ pE  =  Page ("E" , 5 , 2 , [],  ["A" ,  "B" ,  "C" ,  "D" ,  "F" ])
36+ pF  =  Page ("F" , 6 , 1 , ["E" ],  [])
37+ pageDict  =  {pA .address :  pA ,  pB .address :  pB ,  pC .address :  pC ,
38+             pD .address :  pD ,  pE .address :  pE ,  pF .address :  pF }
3839nlp .pagesIndex  =  pageDict 
39- nlp .pagesContent  = {pA .address :testHTML ,pB .address :testHTML2 ,
40-               pC .address :testHTML ,pD .address :testHTML2 ,
41-               pE .address :testHTML ,pF .address :testHTML2 }
40+ nlp .pagesContent  = {pA .address :  testHTML ,  pB .address :  testHTML2 ,
41+                     pC .address :  testHTML ,  pD .address :  testHTML2 ,
42+                     pE .address :  testHTML ,  pF .address :  testHTML2 }
4243
4344# This test takes a long time (> 60 secs) 
4445# def test_loadPageHTML(): 
@@ -50,17 +51,20 @@ def test_lexicon():
5051#     assert all(x in loadedPages for x in fullURLs) 
5152#     assert all(loadedPages.get(key,"") != "" for key in addresses) 
5253
54+ 
5355def  test_stripRawHTML ():
5456    addr  =  "https://en.wikipedia.org/wiki/Ethics" 
5557    aPage  =  loadPageHTML ([addr ])
5658    someHTML  =  aPage [addr ]
5759    strippedHTML  =  stripRawHTML (someHTML )
5860    assert  "<head>"  not  in   strippedHTML  and  "</head>"  not  in   strippedHTML 
5961
62+ 
6063def  test_determineInlinks ():
6164    # TODO 
6265    assert  True 
6366
67+ 
6468def  test_findOutlinks_wiki ():
6569    testPage  =  pageDict [pA .address ]
6670    outlinks  =  findOutlinks (testPage , handleURLs = onlyWikipediaURLS )
@@ -70,35 +74,39 @@ def test_findOutlinks_wiki():
7074# ______________________________________________________________________________ 
7175# HITS Helper Functions 
7276
77+ 
7378def  test_expand_pages ():
7479    pages  =  {k : pageDict [k ] for  k  in  ('F' )}
75-     pagesTwo  =  {k : pageDict [k ] for  k  in  ('A' ,'E' )}
80+     pagesTwo  =  {k : pageDict [k ] for  k  in  ('A' ,  'E' )}
7681    expanded_pages  =  expand_pages (pages )
77-     assert  all (x  in  expanded_pages  for  x  in  ['F' ,'E' ])
78-     assert  all (x  not  in   expanded_pages  for  x  in  ['A' ,'B' ,'C' ,'D' ])
82+     assert  all (x  in  expanded_pages  for  x  in  ['F' ,  'E' ])
83+     assert  all (x  not  in   expanded_pages  for  x  in  ['A' ,  'B' ,  'C' ,  'D' ])
7984    expanded_pages  =  expand_pages (pagesTwo )
8085    print (expanded_pages )
81-     assert  all (x  in  expanded_pages  for  x  in  ['A' ,'B' ,'C' ,'D' ,'E' ,'F' ])
86+     assert  all (x  in  expanded_pages  for  x  in  ['A' , 'B' , 'C' , 'D' , 'E' , 'F' ])
87+ 
8288
8389def  test_relevant_pages ():
8490    pages  =  relevant_pages ("male" )
85-     assert  all ((x  in  pages .keys ()) for  x  in  ['A' ,'C' ,'E' ])
86-     assert  all ((x  not  in   pages ) for  x  in  ['B' ,'D' ,'F' ])
91+     assert  all ((x  in  pages .keys ()) for  x  in  ['A' , 'C' , 'E' ])
92+     assert  all ((x  not  in   pages ) for  x  in  ['B' , 'D' , 'F' ])
93+ 
8794
8895def  test_normalize ():
89-     normalize (  pageDict   )
90-     print (page .hub  for  addr ,page  in  nlp .pagesIndex .items ())
91-     expected_hub  =  [1 / 91 ,2 / 91 ,3 / 91 ,4 / 91 ,5 / 91 ,6 / 91 ] # Works only for sample data above 
96+     normalize (pageDict )
97+     print (page .hub  for  addr ,  page  in  nlp .pagesIndex .items ())
98+     expected_hub  =  [1 / 91 ,  2 / 91 ,  3 / 91 ,  4 / 91 ,  5 / 91 ,  6 / 91 ]   # Works only for sample data above 
9299    expected_auth  =  list (reversed (expected_hub ))
93100    assert  len (expected_hub ) ==  len (expected_auth ) ==  len (nlp .pagesIndex )
94-     assert  expected_hub  ==  [page .hub  for  addr ,page  in  sorted (nlp .pagesIndex .items ())]
95-     assert  expected_auth  ==  [page .authority  for  addr ,page  in  sorted (nlp .pagesIndex .items ())]
101+     assert  expected_hub  ==  [page .hub  for  addr , page  in  sorted (nlp .pagesIndex .items ())]
102+     assert  expected_auth  ==  [page .authority  for  addr , page  in  sorted (nlp .pagesIndex .items ())]
103+ 
96104
97105def  test_detectConvergence ():
98106    # run detectConvergence once to initialise history 
99107    convergence  =  ConvergenceDetector ()
100108    convergence ()
101-     assert  convergence () # values haven't changed so should return True 
109+     assert  convergence ()   # values haven't changed so should return True 
102110    # make tiny increase/decrease to all values 
103111    for  _ , page  in  nlp .pagesIndex .items ():
104112        page .hub  +=  0.0003 
@@ -111,17 +119,21 @@ def test_detectConvergence():
111119    # retest function with values. Should now return false 
112120    assert  not  convergence ()
113121
122+ 
114123def  test_getInlinks ():
115124    inlnks  =  getInlinks (pageDict ['A' ])
116125    assert  sorted ([page .address  for  page  in  inlnks ]) ==  pageDict ['A' ].inlinks 
117126
127+ 
118128def  test_getOutlinks ():
119129    outlnks  =  getOutlinks (pageDict ['A' ])
120130    assert  sorted ([page .address  for  page  in  outlnks ]) ==  pageDict ['A' ].outlinks 
121131
132+ 
122133def  test_HITS ():
123134    # TODO 
124-     assert  True  # leave for now 
135+     assert  True   # leave for now 
136+ 
125137
126138if  __name__  ==  '__main__' :
127139    pytest .main ()
0 commit comments