11import pytest
22import nlp
3- from nlp import loadPageHTML , stripRawHTML , determineInlinks , findOutlinks , onlyWikipediaURLS
3+ from nlp import loadPageHTML , stripRawHTML , findOutlinks , onlyWikipediaURLS
44from nlp import expand_pages , relevant_pages , normalize , ConvergenceDetector , getInlinks
5- from nlp import getOutlinks , Page , HITS
5+ from nlp import getOutlinks , Page
66from nlp import Rules , Lexicon
77# Clumsy imports because we want to access certain nlp.py globals explicitly, because
88# they are accessed by function's within nlp.py
99
10+
1011def test_rules ():
1112 assert Rules (A = "B C | D E" ) == {'A' : [['B' , 'C' ], ['D' , 'E' ]]}
1213
@@ -27,18 +28,18 @@ def test_lexicon():
2728 href="/wiki/TestLiving" href="/wiki/TestMan" >"""
2829testHTML2 = "Nothing"
2930
30- pA = Page ("A" , 1 , 6 , ["B" ,"C" ,"E" ],["D" ])
31- pB = Page ("B" , 2 , 5 , ["E" ],["A" ,"C" ,"D" ])
32- pC = Page ("C" , 3 , 4 , ["B" ,"E" ],["A" ,"D" ])
33- pD = Page ("D" , 4 , 3 , ["A" ,"B" ,"C" ,"E" ],[])
34- pE = Page ("E" , 5 , 2 , [],["A" ,"B" ,"C" ,"D" ,"F" ])
35- pF = Page ("F" , 6 , 1 , ["E" ],[])
36- pageDict = {pA .address :pA ,pB .address :pB ,pC .address :pC ,
37- pD .address :pD ,pE .address :pE ,pF .address :pF }
31+ pA = Page ("A" , 1 , 6 , ["B" , "C" , "E" ], ["D" ])
32+ pB = Page ("B" , 2 , 5 , ["E" ], ["A" , "C" , "D" ])
33+ pC = Page ("C" , 3 , 4 , ["B" , "E" ], ["A" , "D" ])
34+ pD = Page ("D" , 4 , 3 , ["A" , "B" , "C" , "E" ], [])
35+ pE = Page ("E" , 5 , 2 , [], ["A" , "B" , "C" , "D" , "F" ])
36+ pF = Page ("F" , 6 , 1 , ["E" ], [])
37+ pageDict = {pA .address : pA , pB .address : pB , pC .address : pC ,
38+ pD .address : pD , pE .address : pE , pF .address : pF }
3839nlp .pagesIndex = pageDict
39- nlp .pagesContent = {pA .address :testHTML ,pB .address :testHTML2 ,
40- pC .address :testHTML ,pD .address :testHTML2 ,
41- pE .address :testHTML ,pF .address :testHTML2 }
40+ nlp .pagesContent = {pA .address : testHTML , pB .address : testHTML2 ,
41+ pC .address : testHTML , pD .address : testHTML2 ,
42+ pE .address : testHTML , pF .address : testHTML2 }
4243
4344# This test takes a long time (> 60 secs)
4445# def test_loadPageHTML():
@@ -50,17 +51,20 @@ def test_lexicon():
5051# assert all(x in loadedPages for x in fullURLs)
5152# assert all(loadedPages.get(key,"") != "" for key in addresses)
5253
54+
5355def test_stripRawHTML ():
5456 addr = "https://en.wikipedia.org/wiki/Ethics"
5557 aPage = loadPageHTML ([addr ])
5658 someHTML = aPage [addr ]
5759 strippedHTML = stripRawHTML (someHTML )
5860 assert "<head>" not in strippedHTML and "</head>" not in strippedHTML
5961
62+
6063def test_determineInlinks ():
6164 # TODO
6265 assert True
6366
67+
6468def test_findOutlinks_wiki ():
6569 testPage = pageDict [pA .address ]
6670 outlinks = findOutlinks (testPage , handleURLs = onlyWikipediaURLS )
@@ -70,35 +74,39 @@ def test_findOutlinks_wiki():
7074# ______________________________________________________________________________
7175# HITS Helper Functions
7276
77+
7378def test_expand_pages ():
7479 pages = {k : pageDict [k ] for k in ('F' )}
75- pagesTwo = {k : pageDict [k ] for k in ('A' ,'E' )}
80+ pagesTwo = {k : pageDict [k ] for k in ('A' , 'E' )}
7681 expanded_pages = expand_pages (pages )
77- assert all (x in expanded_pages for x in ['F' ,'E' ])
78- assert all (x not in expanded_pages for x in ['A' ,'B' ,'C' ,'D' ])
82+ assert all (x in expanded_pages for x in ['F' , 'E' ])
83+ assert all (x not in expanded_pages for x in ['A' , 'B' , 'C' , 'D' ])
7984 expanded_pages = expand_pages (pagesTwo )
8085 print (expanded_pages )
81- assert all (x in expanded_pages for x in ['A' ,'B' ,'C' ,'D' ,'E' ,'F' ])
86+ assert all (x in expanded_pages for x in ['A' , 'B' , 'C' , 'D' , 'E' , 'F' ])
87+
8288
8389def test_relevant_pages ():
8490 pages = relevant_pages ("male" )
85- assert all ((x in pages .keys ()) for x in ['A' ,'C' ,'E' ])
86- assert all ((x not in pages ) for x in ['B' ,'D' ,'F' ])
91+ assert all ((x in pages .keys ()) for x in ['A' , 'C' , 'E' ])
92+ assert all ((x not in pages ) for x in ['B' , 'D' , 'F' ])
93+
8794
8895def test_normalize ():
89- normalize ( pageDict )
90- print (page .hub for addr ,page in nlp .pagesIndex .items ())
91- expected_hub = [1 / 91 ,2 / 91 ,3 / 91 ,4 / 91 ,5 / 91 ,6 / 91 ] # Works only for sample data above
96+ normalize (pageDict )
97+ print (page .hub for addr , page in nlp .pagesIndex .items ())
98+ expected_hub = [1 / 91 , 2 / 91 , 3 / 91 , 4 / 91 , 5 / 91 , 6 / 91 ] # Works only for sample data above
9299 expected_auth = list (reversed (expected_hub ))
93100 assert len (expected_hub ) == len (expected_auth ) == len (nlp .pagesIndex )
94- assert expected_hub == [page .hub for addr ,page in sorted (nlp .pagesIndex .items ())]
95- assert expected_auth == [page .authority for addr ,page in sorted (nlp .pagesIndex .items ())]
101+ assert expected_hub == [page .hub for addr , page in sorted (nlp .pagesIndex .items ())]
102+ assert expected_auth == [page .authority for addr , page in sorted (nlp .pagesIndex .items ())]
103+
96104
97105def test_detectConvergence ():
98106 # run detectConvergence once to initialise history
99107 convergence = ConvergenceDetector ()
100108 convergence ()
101- assert convergence () # values haven't changed so should return True
109+ assert convergence () # values haven't changed so should return True
102110 # make tiny increase/decrease to all values
103111 for _ , page in nlp .pagesIndex .items ():
104112 page .hub += 0.0003
@@ -111,17 +119,21 @@ def test_detectConvergence():
111119 # retest function with values. Should now return false
112120 assert not convergence ()
113121
122+
114123def test_getInlinks ():
115124 inlnks = getInlinks (pageDict ['A' ])
116125 assert sorted ([page .address for page in inlnks ]) == pageDict ['A' ].inlinks
117126
127+
118128def test_getOutlinks ():
119129 outlnks = getOutlinks (pageDict ['A' ])
120130 assert sorted ([page .address for page in outlnks ]) == pageDict ['A' ].outlinks
121131
132+
122133def test_HITS ():
123134 # TODO
124- assert True # leave for now
135+ assert True # leave for now
136+
125137
126138if __name__ == '__main__' :
127139 pytest .main ()
0 commit comments