Skip to content

Commit e1fca75

Browse files
committed
update mashup steps to meet the py3 standard
1 parent 5400949 commit e1fca75

File tree

5 files changed

+53
-53
lines changed

5 files changed

+53
-53
lines changed

resources/session07/mashup_1.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,20 @@ def get_inspection_page(**kwargs):
3232
params[key] = val
3333
resp = requests.get(url, params=params)
3434
resp.raise_for_status()
35-
return resp.content, resp.encoding
35+
return resp.text
3636

3737

38-
def parse_source(html, encoding='utf-8'):
39-
parsed = BeautifulSoup(html, from_encoding=encoding)
38+
def parse_source(html):
39+
parsed = BeautifulSoup(html)
4040
return parsed
4141

4242

4343
if __name__ == '__main__':
4444
use_params = {
45-
'Inspection_Start': '2/1/2013',
46-
'Inspection_End': '2/1/2015',
45+
'Inspection_Start': '2/1/2014',
46+
'Inspection_End': '2/1/2016',
4747
'Zip_Code': '98101'
4848
}
49-
html, encoding = get_inspection_page(**use_params)
50-
parsed = parse_source(html, encoding)
51-
print parsed.prettify(encoding=encoding)
49+
html = get_inspection_page(**use_params)
50+
parsed = parse_source(html)
51+
print(parsed.prettify())

resources/session07/mashup_2.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from bs4 import BeautifulSoup
2+
import pathlib
23
import re
34
import requests
45

@@ -33,18 +34,17 @@ def get_inspection_page(**kwargs):
3334
params[key] = val
3435
resp = requests.get(url, params=params)
3536
resp.raise_for_status()
36-
return resp.content, resp.encoding
37+
return resp.text
3738

3839

39-
def parse_source(html, encoding='utf-8'):
40-
parsed = BeautifulSoup(html, from_encoding=encoding)
40+
def parse_source(html):
41+
parsed = BeautifulSoup(html)
4142
return parsed
4243

4344

4445
def load_inspection_page(name):
45-
with open(name, 'r') as fh:
46-
content = fh.read()
47-
return content, 'utf-8'
46+
file_path = pathlib.Path(name)
47+
return file_path.read_text(encoding='utf8')
4848

4949

5050
def restaurant_data_generator(html):
@@ -58,9 +58,9 @@ def restaurant_data_generator(html):
5858
'Inspection_End': '2/1/2015',
5959
'Zip_Code': '98101'
6060
}
61-
# html, encoding = get_inspection_page(**use_params)
62-
html, encoding = load_inspection_page('inspection_page.html')
63-
parsed = parse_source(html, encoding)
61+
# html = get_inspection_page(**use_params)
62+
html = load_inspection_page('inspection_page.html')
63+
parsed = parse_source(html)
6464
content_col = parsed.find("td", id="contentcol")
6565
data_list = restaurant_data_generator(content_col)
66-
print data_list[0].prettify()
66+
print(data_list[0].prettify())

resources/session07/mashup_3.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from bs4 import BeautifulSoup
2+
import pathlib
23
import re
34
import requests
45

@@ -33,18 +34,17 @@ def get_inspection_page(**kwargs):
3334
params[key] = val
3435
resp = requests.get(url, params=params)
3536
resp.raise_for_status()
36-
return resp.content, resp.encoding
37+
return resp.text
3738

3839

39-
def parse_source(html, encoding='utf-8'):
40-
parsed = BeautifulSoup(html, from_encoding=encoding)
40+
def parse_source(html):
41+
parsed = BeautifulSoup(html)
4142
return parsed
4243

4344

4445
def load_inspection_page(name):
45-
with open(name, 'r') as fh:
46-
content = fh.read()
47-
return content, 'utf-8'
46+
file_path = pathlib.Path(name)
47+
return file_path.read_text(encoding='utf8')
4848

4949

5050
def restaurant_data_generator(html):
@@ -60,7 +60,7 @@ def has_two_tds(elem):
6060

6161

6262
def clean_data(td):
63-
return unicode(td.text).strip(" \n:-")
63+
return td.text.strip(" \n:-")
6464

6565

6666
def extract_restaurant_metadata(elem):
@@ -83,11 +83,11 @@ def extract_restaurant_metadata(elem):
8383
'Inspection_End': '2/1/2015',
8484
'Zip_Code': '98101'
8585
}
86-
# html, encoding = get_inspection_page(**use_params)
87-
html, encoding = load_inspection_page('inspection_page.html')
88-
parsed = parse_source(html, encoding)
86+
# html = get_inspection_page(**use_params)
87+
html = load_inspection_page('inspection_page.html')
88+
parsed = parse_source(html)
8989
content_col = parsed.find("td", id="contentcol")
9090
data_list = restaurant_data_generator(content_col)
9191
for data_div in data_list:
9292
metadata = extract_restaurant_metadata(data_div)
93-
print metadata
93+
print(metadata)

resources/session07/mashup_4.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from bs4 import BeautifulSoup
2+
import pathlib
23
import re
34
import requests
45

@@ -33,18 +34,17 @@ def get_inspection_page(**kwargs):
3334
params[key] = val
3435
resp = requests.get(url, params=params)
3536
resp.raise_for_status()
36-
return resp.content, resp.encoding
37+
return resp.text
3738

3839

39-
def parse_source(html, encoding='utf-8'):
40-
parsed = BeautifulSoup(html, from_encoding=encoding)
40+
def parse_source(html):
41+
parsed = BeautifulSoup(html)
4142
return parsed
4243

4344

4445
def load_inspection_page(name):
45-
with open(name, 'r') as fh:
46-
content = fh.read()
47-
return content, 'utf-8'
46+
file_path = pathlib.Path(name)
47+
return file_path.read_text(encoding='utf8')
4848

4949

5050
def restaurant_data_generator(html):
@@ -60,7 +60,7 @@ def has_two_tds(elem):
6060

6161

6262
def clean_data(td):
63-
return unicode(td.text).strip(" \n:-")
63+
return td.text.strip(" \n:-")
6464

6565

6666
def extract_restaurant_metadata(elem):
@@ -108,9 +108,9 @@ def get_score_data(elem):
108108
if samples:
109109
average = total/float(samples)
110110
data = {
111-
u'Average Score': average,
112-
u'High Score': high_score,
113-
u'Total Inspections': samples
111+
'Average Score': average,
112+
'High Score': high_score,
113+
'Total Inspections': samples
114114
}
115115
return data
116116

@@ -121,13 +121,13 @@ def get_score_data(elem):
121121
'Inspection_End': '2/1/2015',
122122
'Zip_Code': '98101'
123123
}
124-
# html, encoding = get_inspection_page(**use_params)
125-
html, encoding = load_inspection_page('inspection_page.html')
126-
parsed = parse_source(html, encoding)
124+
# html = get_inspection_page(**use_params)
125+
html = load_inspection_page('inspection_page.html')
126+
parsed = parse_source(html)
127127
content_col = parsed.find("td", id="contentcol")
128128
data_list = restaurant_data_generator(content_col)
129129
for data_div in data_list:
130130
metadata = extract_restaurant_metadata(data_div)
131131
inspection_data = get_score_data(data_div)
132132
metadata.update(inspection_data)
133-
print metadata
133+
print(metadata)

resources/session07/mashup_5.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from bs4 import BeautifulSoup
22
import geocoder
33
import json
4+
import pathlib
45
import re
56
import requests
67

@@ -35,18 +36,17 @@ def get_inspection_page(**kwargs):
3536
params[key] = val
3637
resp = requests.get(url, params=params)
3738
resp.raise_for_status()
38-
return resp.content, resp.encoding
39+
return resp.text
3940

4041

41-
def parse_source(html, encoding='utf-8'):
42-
parsed = BeautifulSoup(html, from_encoding=encoding)
42+
def parse_source(html):
43+
parsed = BeautifulSoup(html)
4344
return parsed
4445

4546

4647
def load_inspection_page(name):
47-
with open(name, 'r') as fh:
48-
content = fh.read()
49-
return content, 'utf-8'
48+
file_path = pathlib.Path(name)
49+
return file_path.read_text(encoding='utf8')
5050

5151

5252
def restaurant_data_generator(html):
@@ -62,7 +62,7 @@ def has_two_tds(elem):
6262

6363

6464
def clean_data(td):
65-
return unicode(td.text).strip(" \n:-")
65+
return td.text.strip(" \n:-")
6666

6767

6868
def extract_restaurant_metadata(elem):
@@ -123,9 +123,9 @@ def result_generator(count):
123123
'Inspection_End': '2/1/2015',
124124
'Zip_Code': '98101'
125125
}
126-
# html, encoding = get_inspection_page(**use_params)
127-
html, encoding = load_inspection_page('inspection_page.html')
128-
parsed = parse_source(html, encoding)
126+
# html = get_inspection_page(**use_params)
127+
html = load_inspection_page('inspection_page.html')
128+
parsed = parse_source(html)
129129
content_col = parsed.find("td", id="contentcol")
130130
data_list = restaurant_data_generator(content_col)
131131
for data_div in data_list[:count]:

0 commit comments

Comments
 (0)