11from bs4 import BeautifulSoup
2+ import pathlib
23import re
34import requests
45
@@ -33,18 +34,17 @@ def get_inspection_page(**kwargs):
3334 params [key ] = val
3435 resp = requests .get (url , params = params )
3536 resp .raise_for_status ()
36- return resp .content , resp . encoding
37+ return resp .text
3738
3839
39- def parse_source (html , encoding = 'utf-8' ):
40- parsed = BeautifulSoup (html , from_encoding = encoding )
40+ def parse_source (html ):
41+ parsed = BeautifulSoup (html )
4142 return parsed
4243
4344
4445def load_inspection_page (name ):
45- with open (name , 'r' ) as fh :
46- content = fh .read ()
47- return content , 'utf-8'
46+ file_path = pathlib .Path (name )
47+ return file_path .read_text (encoding = 'utf8' )
4848
4949
5050def restaurant_data_generator (html ):
@@ -60,7 +60,7 @@ def has_two_tds(elem):
6060
6161
6262def clean_data (td ):
63- return unicode ( td .text ) .strip (" \n :-" )
63+ return td .text .strip (" \n :-" )
6464
6565
6666def extract_restaurant_metadata (elem ):
@@ -108,9 +108,9 @@ def get_score_data(elem):
108108 if samples :
109109 average = total / float (samples )
110110 data = {
111- u 'Average Score' : average ,
112- u 'High Score' : high_score ,
113- u 'Total Inspections' : samples
111+ 'Average Score' : average ,
112+ 'High Score' : high_score ,
113+ 'Total Inspections' : samples
114114 }
115115 return data
116116
@@ -121,13 +121,13 @@ def get_score_data(elem):
121121 'Inspection_End' : '2/1/2015' ,
122122 'Zip_Code' : '98101'
123123 }
124- # html, encoding = get_inspection_page(**use_params)
125- html , encoding = load_inspection_page ('inspection_page.html' )
126- parsed = parse_source (html , encoding )
124+ # html = get_inspection_page(**use_params)
125+ html = load_inspection_page ('inspection_page.html' )
126+ parsed = parse_source (html )
127127 content_col = parsed .find ("td" , id = "contentcol" )
128128 data_list = restaurant_data_generator (content_col )
129129 for data_div in data_list :
130130 metadata = extract_restaurant_metadata (data_div )
131131 inspection_data = get_score_data (data_div )
132132 metadata .update (inspection_data )
133- print metadata
133+ print ( metadata )
0 commit comments