From 1be08e188b326b5ad7292238ea0ce45c0ac27b35 Mon Sep 17 00:00:00 2001 From: robalford Date: Sun, 17 Jan 2016 19:41:26 -0800 Subject: [PATCH 01/21] tests passing for http_server. could use some refactoring. --- resources/session02/homework/http_server.py | 27 +++++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/resources/session02/homework/http_server.py b/resources/session02/homework/http_server.py index 84ceffe1..86c9d288 100644 --- a/resources/session02/homework/http_server.py +++ b/resources/session02/homework/http_server.py @@ -1,14 +1,16 @@ import socket import sys +import pathlib +import mimetypes -def response_ok(body=b"this is a pretty minimal response", mimetype=b"text/plain"): +def response_ok(body, mimetype): """returns a basic HTTP response""" resp = [] resp.append(b"HTTP/1.1 200 OK") - resp.append(b"Content-Type: text/plain") + resp.append(b"Content-Type: " + mimetype) # couldn't format byte string resp.append(b"") - resp.append(b"this is a pretty minimal response") + resp.append(body) return b"\r\n".join(resp) @@ -22,7 +24,10 @@ def response_method_not_allowed(): def response_not_found(): """returns a 404 Not Found response""" - return b"" + resp = [] + resp.append("HTTP/1.1 404 Not Found") + resp.append("") + return "\r\n".join(resp).encode('utf8') def parse_request(request): @@ -33,9 +38,21 @@ def parse_request(request): return uri +# this is working, move to next step of homework def resolve_uri(uri): """This method should return appropriate content and a mime type""" - return b"still broken", b"text/plain" + path = pathlib.Path('webroot{}'.format(uri)) + if path.exists() and path.is_dir(): + resources = [item.name.encode('utf8') for item in path.iterdir()] + contents = b'\r\n'.join(resources) + mime_type = b'text/plain' + elif path.exists() and path.is_file(): + contents = path.read_bytes() + mime_type = mimetypes.guess_type(uri)[0].encode('utf8') + # mime_type = mimetypes.types_map(uri).encode('utf8') + else: + raise NameError('No such file or directory. Please try again.') + return contents, mime_type def server(log_buffer=sys.stderr): From 530e5e206846f4c8b08fd2181965d8fc796a5f6d Mon Sep 17 00:00:00 2001 From: robalford Date: Mon, 18 Jan 2016 14:37:16 -0800 Subject: [PATCH 02/21] refactored http_server. tests passing --- resources/session02/homework/http_server.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/resources/session02/homework/http_server.py b/resources/session02/homework/http_server.py index 86c9d288..2794e1d2 100644 --- a/resources/session02/homework/http_server.py +++ b/resources/session02/homework/http_server.py @@ -8,7 +8,7 @@ def response_ok(body, mimetype): """returns a basic HTTP response""" resp = [] resp.append(b"HTTP/1.1 200 OK") - resp.append(b"Content-Type: " + mimetype) # couldn't format byte string + resp.append(b"Content-Type: " + mimetype) # couldn't call format() on byte string resp.append(b"") resp.append(body) return b"\r\n".join(resp) @@ -38,18 +38,16 @@ def parse_request(request): return uri -# this is working, move to next step of homework def resolve_uri(uri): """This method should return appropriate content and a mime type""" path = pathlib.Path('webroot{}'.format(uri)) - if path.exists() and path.is_dir(): + if path.is_dir(): resources = [item.name.encode('utf8') for item in path.iterdir()] contents = b'\r\n'.join(resources) mime_type = b'text/plain' - elif path.exists() and path.is_file(): + elif path.is_file(): contents = path.read_bytes() mime_type = mimetypes.guess_type(uri)[0].encode('utf8') - # mime_type = mimetypes.types_map(uri).encode('utf8') else: raise NameError('No such file or directory. Please try again.') return contents, mime_type From 92bedc932ad41ff0bef32e55b2b0c3905cc5c477 Mon Sep 17 00:00:00 2001 From: robalford Date: Mon, 25 Jan 2016 11:30:25 -0800 Subject: [PATCH 03/21] calculator working. need to write instructions, considering using form instead of url --- resources/session03/calculator/calculator.py | 61 ++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 resources/session03/calculator/calculator.py diff --git a/resources/session03/calculator/calculator.py b/resources/session03/calculator/calculator.py new file mode 100644 index 00000000..33c08a8a --- /dev/null +++ b/resources/session03/calculator/calculator.py @@ -0,0 +1,61 @@ +import re +import operator + + +def instructions(): + instructions = """

Here is how to use my calculator.

""" + return instructions + + +def calculate(num1, op_str, num2): + ops = {'+': operator.add, + '-': operator.sub, + '*': operator.mul, + '/': operator.truediv} + result = ops[op_str](int(num1), int(num2)) + calculation = "

{} {} {} equals {}.

" + return calculation.format(num1, op_str, num2, result) + + +def resolve_path(path): + urls = [(r'^$', instructions), + (r'^([\d]+)(\+|\-|\*|\/)([\d]+)$', calculate)] + matchpath = path.lstrip('/') + for regexp, func in urls: + match = re.match(regexp, matchpath) + if match is None: + continue + args = match.groups([]) + return func, args + # we get here if no url matches + raise NameError + + +def application(environ, start_response): + headers = [("Content-type", "text/html")] + try: + path = environ.get('PATH_INFO', None) + if path is None: + raise NameError + func, args = resolve_path(path) + body = func(*args) + status = "200 OK" + except NameError: + status = "404 Not Found" + body = "

Not Found

" + except ZeroDivisionError: + status = "400 Bad Request" + body = "

You can't divide by zero!

" + except Exception: + status = "500 Internal Server Error" + body = "

Internal Server Error

" + finally: + headers.append(('Content-length', str(len(body)))) + start_response(status, headers) + return [body.encode('utf8')] + + +if __name__ == '__main__': + from wsgiref.simple_server import make_server + srv = make_server('localhost', 8080, application) + srv.serve_forever() From 257c2b64be492c7ebe3ef4372e8b0469d6c839e4 Mon Sep 17 00:00:00 2001 From: robalford Date: Mon, 25 Jan 2016 15:06:29 -0800 Subject: [PATCH 04/21] separated html into functions, added form to calculator. --- resources/session03/calculator/calculator.py | 60 ++++++++++++++++---- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/resources/session03/calculator/calculator.py b/resources/session03/calculator/calculator.py index 33c08a8a..5e8de5ed 100644 --- a/resources/session03/calculator/calculator.py +++ b/resources/session03/calculator/calculator.py @@ -1,10 +1,41 @@ import re import operator +from urllib.parse import parse_qsl -def instructions(): - instructions = """

Here is how to use my calculator.

""" - return instructions +def header(): + header = """ + +Calculator + + +

Python Does Math For You


""" + return header + + +def footer(): + footer = """ +

+
© pythondoesmathforyou.com
+ +""" + return footer + + +def html_doc(body): + head = header() + foot = footer() + return head + body + foot + + +def calculator(): + calculator = """ +
+ Enter a calculation (e.g. '5+2'):

+

+ +
""" + return calculator def calculate(num1, op_str, num2): @@ -13,12 +44,13 @@ def calculate(num1, op_str, num2): '*': operator.mul, '/': operator.truediv} result = ops[op_str](int(num1), int(num2)) - calculation = "

{} {} {} equals {}.

" + calculation = """

{} {} {} equals {}



+ Make another calculation.""" return calculation.format(num1, op_str, num2, result) def resolve_path(path): - urls = [(r'^$', instructions), + urls = [(r'^$', calculator), (r'^([\d]+)(\+|\-|\*|\/)([\d]+)$', calculate)] matchpath = path.lstrip('/') for regexp, func in urls: @@ -35,24 +67,32 @@ def application(environ, start_response): headers = [("Content-type", "text/html")] try: path = environ.get('PATH_INFO', None) + qs = environ.get('QUERY_STRING', None) if path is None: raise NameError + if qs: + qsl = parse_qsl(qs) # urllib function to convert query string to list + path = qsl[0][1].replace(' ', '') # grab the calculation value, strip whitespace and store it in path func, args = resolve_path(path) body = func(*args) status = "200 OK" except NameError: status = "404 Not Found" - body = "

Not Found

" + body = """

Not Found

+ Make another calculation.""" # DRY except ZeroDivisionError: status = "400 Bad Request" - body = "

You can't divide by zero!

" + body = """

You can't divide by zero!

+ Make another calculation.""" except Exception: status = "500 Internal Server Error" - body = "

Internal Server Error

" + body = """

Internal Server Error

+ Make another calculation.""" finally: - headers.append(('Content-length', str(len(body)))) + html = html_doc(body) + headers.append(('Content-length', str(len(html)))) start_response(status, headers) - return [body.encode('utf8')] + return [html.encode('utf8')] if __name__ == '__main__': From 24485e1f3237c1457b2061d05a5b91fb299d5d93 Mon Sep 17 00:00:00 2001 From: robalford Date: Tue, 26 Jan 2016 10:11:28 -0800 Subject: [PATCH 05/21] working version of calculator.py --- resources/session03/calculator/calculator.py | 34 ++++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/resources/session03/calculator/calculator.py b/resources/session03/calculator/calculator.py index 5e8de5ed..bb6f0051 100644 --- a/resources/session03/calculator/calculator.py +++ b/resources/session03/calculator/calculator.py @@ -22,12 +22,6 @@ def footer(): return footer -def html_doc(body): - head = header() - foot = footer() - return head + body + foot - - def calculator(): calculator = """
@@ -49,6 +43,12 @@ def calculate(num1, op_str, num2): return calculation.format(num1, op_str, num2, result) +def html_doc(doc_body): + doc_header = header() + doc_footer = footer() + return doc_header + doc_body + doc_footer + + def resolve_path(path): urls = [(r'^$', calculator), (r'^([\d]+)(\+|\-|\*|\/)([\d]+)$', calculate)] @@ -66,28 +66,28 @@ def resolve_path(path): def application(environ, start_response): headers = [("Content-type", "text/html")] try: - path = environ.get('PATH_INFO', None) + request = environ.get('PATH_INFO', None) qs = environ.get('QUERY_STRING', None) - if path is None: + if request is None: raise NameError if qs: - qsl = parse_qsl(qs) # urllib function to convert query string to list - path = qsl[0][1].replace(' ', '') # grab the calculation value, strip whitespace and store it in path - func, args = resolve_path(path) + qsl = parse_qsl(qs) # urllib function to convert query string to list of keys and values + request = qsl[0][1].replace(' ', '') # grab the calculation from the query string, strip whitespace and store it in request + func, args = resolve_path(request) body = func(*args) status = "200 OK" except NameError: - status = "404 Not Found" - body = """

Not Found

- Make another calculation.""" # DRY + status = "400 Bad Request" + body = """

Please re-enter your calculation using only digits and the following operands: +, -, *, /. Thanks!

+ Try another calculation.""" except ZeroDivisionError: status = "400 Bad Request" body = """

You can't divide by zero!

- Make another calculation.""" + Let's try something more reasonable.""" except Exception: status = "500 Internal Server Error" - body = """

Internal Server Error

- Make another calculation.""" + body = """

Something bad has happened, but it's not your fault. Sorry.

+ Give us another chance.""" finally: html = html_doc(body) headers.append(('Content-length', str(len(html)))) From c9ff406b48939be9185244f21a53b461a5da34ec Mon Sep 17 00:00:00 2001 From: robalford Date: Fri, 29 Jan 2016 15:31:00 -0800 Subject: [PATCH 06/21] mashup.py initial code from class. added a couple comments. no changes to the code. --- resources/session04/soup/mashup.py | 164 +++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 resources/session04/soup/mashup.py diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py new file mode 100644 index 00000000..b3d6fe5e --- /dev/null +++ b/resources/session04/soup/mashup.py @@ -0,0 +1,164 @@ +from bs4 import BeautifulSoup +import geocoder +import json +import pathlib +import re +import requests + + +INSPECTION_DOMAIN = 'http://info.kingcounty.gov/' +INSPECTION_PATH = '/health/ehs/foodsafety/inspections/Results.aspx' +INSPECTION_PARAMS = { + 'Output': 'W', + 'Business_Name': '', + 'Business_Address': '', + 'Longitude': '', + 'Latitude': '', + 'City': '', + 'Zip_Code': '', + 'Inspection_Type': 'All', + 'Inspection_Start': '', + 'Inspection_End': '', + 'Inspection_Closed_Business': 'A', + 'Violation_Points': '', + 'Violation_Red_Points': '', + 'Violation_Descr': '', + 'Fuzzy_Search': 'N', + 'Sort': 'H' +} + + +def get_inspection_page(**kwargs): + url = INSPECTION_DOMAIN + INSPECTION_PATH + params = INSPECTION_PARAMS.copy() + for key, val in kwargs.items(): + if key in INSPECTION_PARAMS: + params[key] = val + resp = requests.get(url, params=params) + resp.raise_for_status() # raise python exception based on http status + return resp.text + + +def parse_source(html): + parsed = BeautifulSoup(html) + return parsed + + +def load_inspection_page(name): + file_path = pathlib.Path(name) + return file_path.read_text(encoding='utf8') + + +def restaurant_data_generator(html): + id_finder = re.compile(r'PR[\d]+~') + return html.find_all('div', id=id_finder) + + +def has_two_tds(elem): + is_tr = elem.name == 'tr' + td_children = elem.find_all('td', recursive=False) + has_two = len(td_children) == 2 + return is_tr and has_two + + +def clean_data(td): + return td.text.strip(" \n:-") + + +def extract_restaurant_metadata(elem): + restaurant_data_rows = elem.find('tbody').find_all( + has_two_tds, recursive=False + ) + rdata = {} + current_label = '' + for data_row in restaurant_data_rows: + key_cell, val_cell = data_row.find_all('td', recursive=False) + new_label = clean_data(key_cell) + current_label = new_label if new_label else current_label + rdata.setdefault(current_label, []).append(clean_data(val_cell)) + return rdata + + +def is_inspection_data_row(elem): + is_tr = elem.name == 'tr' + if not is_tr: + return False + td_children = elem.find_all('td', recursive=False) + has_four = len(td_children) == 4 + this_text = clean_data(td_children[0]).lower() + contains_word = 'inspection' in this_text + does_not_start = not this_text.startswith('inspection') + return is_tr and has_four and contains_word and does_not_start + + +def get_score_data(elem): + inspection_rows = elem.find_all(is_inspection_data_row) + samples = len(inspection_rows) + total = 0 + high_score = 0 + average = 0 + for row in inspection_rows: + strval = clean_data(row.find_all('td')[2]) + try: + intval = int(strval) + except (ValueError, TypeError): + samples -= 1 + else: + total += intval + high_score = intval if intval > high_score else high_score + + if samples: + average = total/float(samples) + data = { + u'Average Score': average, + u'High Score': high_score, + u'Total Inspections': samples + } + return data + + +def result_generator(count): + use_params = { + 'Inspection_Start': '2/1/2013', + 'Inspection_End': '2/1/2015', + 'Zip_Code': '98101' + } + # html = get_inspection_page(**use_params) + html = load_inspection_page('inspection_page.html') + parsed = parse_source(html) + content_col = parsed.find("td", id="contentcol") + data_list = restaurant_data_generator(content_col) + for data_div in data_list[:count]: + metadata = extract_restaurant_metadata(data_div) + inspection_data = get_score_data(data_div) + metadata.update(inspection_data) + yield metadata + + +def get_geojson(result): + address = " ".join(result.get('Address', '')) + if not address: + return None + geocoded = geocoder.google(address) + geojson = geocoded.geojson + inspection_data = {} + use_keys = ( + 'Business Name', 'Average Score', 'Total Inspections', 'High Score' + ) + for key, val in result.items(): + if key not in use_keys: + continue + if isinstance(val, list): + val = " ".join(val) + inspection_data[key] = val + geojson['properties'] = inspection_data + return geojson + + +if __name__ == '__main__': + total_result = {'type': 'FeatureCollection', 'features': []} + for result in result_generator(10): + geojson = get_geojson(result) + total_result['features'].append(geojson) + with open('my_map.json', 'w') as fh: + json.dump(total_result, fh) From aea8f31bc67016b652e35942198db6a05fc2ac97 Mon Sep 17 00:00:00 2001 From: robalford Date: Fri, 29 Jan 2016 16:08:05 -0800 Subject: [PATCH 07/21] added sorting function to sort by average score --- resources/session04/soup/mashup.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index b3d6fe5e..dd642981 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -160,5 +160,10 @@ def get_geojson(result): for result in result_generator(10): geojson = get_geojson(result) total_result['features'].append(geojson) - with open('my_map.json', 'w') as fh: - json.dump(total_result, fh) + # sort results by average score, from highest to lowest + sorted_result = sorted(total_result['features'], + key=lambda k: k['properties']['Average Score'], + reverse=True) + import pdb; pdb.set_trace() + # with open('my_map.json', 'w') as fh: + # json.dump(total_result, fh) From 8f57df4d5282d614aba96ca3a9f79f9a7f5468ce Mon Sep 17 00:00:00 2001 From: robalford Date: Fri, 29 Jan 2016 16:28:20 -0800 Subject: [PATCH 08/21] added very basic sorting by high score, avg score, or most inspections via sys.argv list. needs serious refactoring --- resources/session04/soup/mashup.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index dd642981..f24cc1ae 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -1,3 +1,5 @@ +import sys +from pprint import pprint from bs4 import BeautifulSoup import geocoder import json @@ -161,9 +163,18 @@ def get_geojson(result): geojson = get_geojson(result) total_result['features'].append(geojson) # sort results by average score, from highest to lowest - sorted_result = sorted(total_result['features'], - key=lambda k: k['properties']['Average Score'], - reverse=True) - import pdb; pdb.set_trace() + if sys.argv[1] == 'highscore': + sorted_result = sorted(total_result['features'], + key=lambda k: k['properties']['High Score'], + reverse=True) + elif sys.argv[1] == 'average': + sorted_result = sorted(total_result['features'], + key=lambda k: k['properties']['Average Score'], + reverse=True) + elif sys.argv[1] == 'most': + sorted_result = sorted(total_result['features'], + key=lambda k: k['properties']['Total Inspections'], + reverse=True) + pprint(sorted_result) # with open('my_map.json', 'w') as fh: # json.dump(total_result, fh) From e56994b687f7a9a1317edf440680e2fbe3f564bd Mon Sep 17 00:00:00 2001 From: robalford Date: Fri, 29 Jan 2016 17:13:33 -0800 Subject: [PATCH 09/21] added command line argument for number of results. explore argparse module to clean up command line interface. --- resources/session04/soup/mashup.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index f24cc1ae..1ead000f 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -156,13 +156,15 @@ def get_geojson(result): geojson['properties'] = inspection_data return geojson - if __name__ == '__main__': total_result = {'type': 'FeatureCollection', 'features': []} - for result in result_generator(10): + # allow user to select number of results. use argparse to clean this up? + num_results = int(sys.argv[2]) if sys.argv[2] else 10 + for result in result_generator(num_results): geojson = get_geojson(result) total_result['features'].append(geojson) - # sort results by average score, from highest to lowest + # sort results by average score, high score or most inspections. + # explore argparse to make this all work better, with instructions, exceptions etc. if sys.argv[1] == 'highscore': sorted_result = sorted(total_result['features'], key=lambda k: k['properties']['High Score'], From 277303670055df75bfea2af067a0cfdf787b9a65 Mon Sep 17 00:00:00 2001 From: robalford Date: Sat, 30 Jan 2016 10:39:40 -0800 Subject: [PATCH 10/21] refactored sorting and ordering processes into their own functions. --- resources/session04/soup/mashup.py | 56 +++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index 1ead000f..a4b21857 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -156,27 +156,51 @@ def get_geojson(result): geojson['properties'] = inspection_data return geojson + +# functions for dealing with command line args +def sort_order(args): + if 'reverse' not in args: + return True + else: + return False + + +def sort_by(args): + sort_by = { + 'highscore': 'High Score', + 'averagescore': 'Average Score', + 'mostinspections': 'Total Inspections', + } + for order in sort_by: + if order in args: + return sort_by[order] + + +def get_count(args): + for arg in args: + try: + arg = int(arg) + return arg + except ValueError: + continue + return 10 + + if __name__ == '__main__': total_result = {'type': 'FeatureCollection', 'features': []} - # allow user to select number of results. use argparse to clean this up? - num_results = int(sys.argv[2]) if sys.argv[2] else 10 - for result in result_generator(num_results): + # get command line arguments for sorting, limiting and ordering results + args = sys.argv[1:] + count = get_count(args) + sort_by = sort_by(args) + sort_order = sort_order(args) + for result in result_generator(count): geojson = get_geojson(result) total_result['features'].append(geojson) - # sort results by average score, high score or most inspections. - # explore argparse to make this all work better, with instructions, exceptions etc. - if sys.argv[1] == 'highscore': - sorted_result = sorted(total_result['features'], - key=lambda k: k['properties']['High Score'], - reverse=True) - elif sys.argv[1] == 'average': - sorted_result = sorted(total_result['features'], - key=lambda k: k['properties']['Average Score'], - reverse=True) - elif sys.argv[1] == 'most': + # sort and order results based on command line args + if sort_by: sorted_result = sorted(total_result['features'], - key=lambda k: k['properties']['Total Inspections'], - reverse=True) + key=lambda k: k['properties'][sort_by], + reverse=sort_order) pprint(sorted_result) # with open('my_map.json', 'w') as fh: # json.dump(total_result, fh) From e9055ad5c1e86077c134b3104b0973341e90fdaf Mon Sep 17 00:00:00 2001 From: robalford Date: Sat, 30 Jan 2016 11:01:36 -0800 Subject: [PATCH 11/21] fixed bug in my code for generating json file. sort only the features dict of total result and return the entire result. seems to be working now. --- resources/session04/soup/mashup.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index a4b21857..bf7128a6 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -198,9 +198,9 @@ def get_count(args): total_result['features'].append(geojson) # sort and order results based on command line args if sort_by: - sorted_result = sorted(total_result['features'], - key=lambda k: k['properties'][sort_by], - reverse=sort_order) - pprint(sorted_result) - # with open('my_map.json', 'w') as fh: - # json.dump(total_result, fh) + total_result['features'] = sorted(total_result['features'], + key=lambda k: k['properties'][sort_by], + reverse=sort_order) + # pprint(total_result) + with open('my_map.json', 'w') as fh: + json.dump(total_result, fh) From 2f539a2413d13b727abb3923b8691af1182f12aa Mon Sep 17 00:00:00 2001 From: robalford Date: Sat, 30 Jan 2016 14:25:12 -0800 Subject: [PATCH 12/21] refactored result_generator to sort and order restaurant results before yielding individual records. --- resources/session04/soup/mashup.py | 31 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index bf7128a6..fd6b508a 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -119,7 +119,7 @@ def get_score_data(elem): return data -def result_generator(count): +def result_generator(count, sort_by, sort_order): use_params = { 'Inspection_Start': '2/1/2013', 'Inspection_End': '2/1/2015', @@ -130,11 +130,19 @@ def result_generator(count): parsed = parse_source(html) content_col = parsed.find("td", id="contentcol") data_list = restaurant_data_generator(content_col) - for data_div in data_list[:count]: + restaurant_list = [] + # for data_div in data_list[:count]: + for data_div in data_list: metadata = extract_restaurant_metadata(data_div) inspection_data = get_score_data(data_div) metadata.update(inspection_data) - yield metadata + restaurant_list.append(metadata) + if sort_by: + restaurant_list = sorted(restaurant_list, + key=lambda k: k[sort_by], + reverse=sort_order) + for restaurant in restaurant_list[:count]: + yield restaurant def get_geojson(result): @@ -189,18 +197,19 @@ def get_count(args): if __name__ == '__main__': total_result = {'type': 'FeatureCollection', 'features': []} # get command line arguments for sorting, limiting and ordering results + # explore argparse or click for better command line interface args = sys.argv[1:] count = get_count(args) sort_by = sort_by(args) sort_order = sort_order(args) - for result in result_generator(count): + for result in result_generator(count, sort_by, sort_order): geojson = get_geojson(result) total_result['features'].append(geojson) # sort and order results based on command line args - if sort_by: - total_result['features'] = sorted(total_result['features'], - key=lambda k: k['properties'][sort_by], - reverse=sort_order) - # pprint(total_result) - with open('my_map.json', 'w') as fh: - json.dump(total_result, fh) + # if sort_by: + # total_result['features'] = sorted(total_result['features'], + # key=lambda k: k['properties'][sort_by], + # reverse=sort_order) + pprint(total_result) + # with open('my_map.json', 'w') as fh: + # json.dump(total_result, fh) From 569a3fde63d2a5c8b1f548d0a4d394dfd9bc3b42 Mon Sep 17 00:00:00 2001 From: robalford Date: Sun, 31 Jan 2016 13:26:46 -0800 Subject: [PATCH 13/21] marker-color property set based on average score. need to add other sorting options. refine color coding. --- resources/session04/soup/mashup.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index fd6b508a..e9db7391 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -119,6 +119,19 @@ def get_score_data(elem): return data +def set_marker_color(inspection_data): + # set marker color based on average score. + # green = go, yellow = proceed with caution, red = stop + # need to make this work for other sorting values too + if inspection_data['Average Score'] >= 50: + inspection_data['marker-color'] = '#00ff00' + elif 50 >= inspection_data['Average Score'] >= 35: + inspection_data['marker-color'] = '#ffff00' + elif inspection_data['Average Score'] <= 35: + inspection_data['marker-color'] = '#ff0000' + return inspection_data + + def result_generator(count, sort_by, sort_order): use_params = { 'Inspection_Start': '2/1/2013', @@ -153,7 +166,11 @@ def get_geojson(result): geojson = geocoded.geojson inspection_data = {} use_keys = ( - 'Business Name', 'Average Score', 'Total Inspections', 'High Score' + 'marker-color', + 'Business Name', + 'Average Score', + 'Total Inspections', + 'High Score' ) for key, val in result.items(): if key not in use_keys: @@ -161,6 +178,9 @@ def get_geojson(result): if isinstance(val, list): val = " ".join(val) inspection_data[key] = val + # use your function above to set marker color property based on avg. score + # adjust to use other sorting criteria + inspection_data = set_marker_color(inspection_data) geojson['properties'] = inspection_data return geojson @@ -210,6 +230,6 @@ def get_count(args): # total_result['features'] = sorted(total_result['features'], # key=lambda k: k['properties'][sort_by], # reverse=sort_order) - pprint(total_result) - # with open('my_map.json', 'w') as fh: - # json.dump(total_result, fh) + # pprint(total_result) + with open('my_map.json', 'w') as fh: + json.dump(total_result, fh) From d216d51b810f432bfed4d31fd3b7299511c15b57 Mon Sep 17 00:00:00 2001 From: robalford Date: Sun, 31 Jan 2016 14:22:28 -0800 Subject: [PATCH 14/21] revised set_marker_color function to use various sorting criteria. still needs some refining. --- resources/session04/soup/mashup.py | 35 +++++++++++++----------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index e9db7391..775aca45 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -119,19 +119,6 @@ def get_score_data(elem): return data -def set_marker_color(inspection_data): - # set marker color based on average score. - # green = go, yellow = proceed with caution, red = stop - # need to make this work for other sorting values too - if inspection_data['Average Score'] >= 50: - inspection_data['marker-color'] = '#00ff00' - elif 50 >= inspection_data['Average Score'] >= 35: - inspection_data['marker-color'] = '#ffff00' - elif inspection_data['Average Score'] <= 35: - inspection_data['marker-color'] = '#ff0000' - return inspection_data - - def result_generator(count, sort_by, sort_order): use_params = { 'Inspection_Start': '2/1/2013', @@ -180,7 +167,7 @@ def get_geojson(result): inspection_data[key] = val # use your function above to set marker color property based on avg. score # adjust to use other sorting criteria - inspection_data = set_marker_color(inspection_data) + # inspection_data = set_marker_color(inspection_data) geojson['properties'] = inspection_data return geojson @@ -214,6 +201,18 @@ def get_count(args): return 10 +def set_marker_color(sort_by, results): + # calculate the average score for this sample size and sorting criteria + # need to find average of result set for more useful color coding + for result in results: + if result['properties'][sort_by] >= 66: + result['properties']['marker-color'] = '#00ff00' + elif result['properties'][sort_by] <= 33: + result['properties']['marker-color'] = '#ffff00' + else: + result['properties']['marker-color'] = '#ff0000' + return results + if __name__ == '__main__': total_result = {'type': 'FeatureCollection', 'features': []} # get command line arguments for sorting, limiting and ordering results @@ -225,11 +224,7 @@ def get_count(args): for result in result_generator(count, sort_by, sort_order): geojson = get_geojson(result) total_result['features'].append(geojson) - # sort and order results based on command line args - # if sort_by: - # total_result['features'] = sorted(total_result['features'], - # key=lambda k: k['properties'][sort_by], - # reverse=sort_order) - # pprint(total_result) + # set marker-color property for result set based on sorting criteria + total_result['features'] = set_marker_color(sort_by, total_result['features']) with open('my_map.json', 'w') as fh: json.dump(total_result, fh) From 52d583838eee6ad639b9b8dd16439c500f2d48b6 Mon Sep 17 00:00:00 2001 From: robalford Date: Mon, 1 Feb 2016 10:54:06 -0800 Subject: [PATCH 15/21] reworked set_marker_color() --- resources/session04/soup/mashup.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index 775aca45..a874a940 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -131,7 +131,6 @@ def result_generator(count, sort_by, sort_order): content_col = parsed.find("td", id="contentcol") data_list = restaurant_data_generator(content_col) restaurant_list = [] - # for data_div in data_list[:count]: for data_div in data_list: metadata = extract_restaurant_metadata(data_div) inspection_data = get_score_data(data_div) @@ -165,9 +164,6 @@ def get_geojson(result): if isinstance(val, list): val = " ".join(val) inspection_data[key] = val - # use your function above to set marker color property based on avg. score - # adjust to use other sorting criteria - # inspection_data = set_marker_color(inspection_data) geojson['properties'] = inspection_data return geojson @@ -203,14 +199,17 @@ def get_count(args): def set_marker_color(sort_by, results): # calculate the average score for this sample size and sorting criteria - # need to find average of result set for more useful color coding + scores = [result['properties'][sort_by] for result in results] + avg_score = sum(scores)/len(scores) + # set marker-color property for results based on relationship to avg. score + # green=go, yellow=proceed with caution, red=stop for result in results: - if result['properties'][sort_by] >= 66: + if result['properties'][sort_by] >= (avg_score+5): result['properties']['marker-color'] = '#00ff00' - elif result['properties'][sort_by] <= 33: - result['properties']['marker-color'] = '#ffff00' - else: + elif result['properties'][sort_by] <= (avg_score-5): result['properties']['marker-color'] = '#ff0000' + else: + result['properties']['marker-color'] = '#ffff00' return results if __name__ == '__main__': From 4b32a02bc14f9a59c8269908b51c5c31e58567f2 Mon Sep 17 00:00:00 2001 From: robalford Date: Mon, 1 Feb 2016 15:27:35 -0800 Subject: [PATCH 16/21] began to convert command line interface and argument handling to use the click package. working version, may still have some bugs. --- resources/session04/soup/mashup.py | 96 +++++++++++++++++++----------- 1 file changed, 61 insertions(+), 35 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index a874a940..aa4b2128 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -1,11 +1,12 @@ import sys -from pprint import pprint +from pprint import pprint # used for debugging from bs4 import BeautifulSoup import geocoder import json import pathlib import re import requests +import click INSPECTION_DOMAIN = 'http://info.kingcounty.gov/' @@ -119,7 +120,7 @@ def get_score_data(elem): return data -def result_generator(count, sort_by, sort_order): +def result_generator(sort_by, low_to_high, count): use_params = { 'Inspection_Start': '2/1/2013', 'Inspection_End': '2/1/2015', @@ -139,7 +140,7 @@ def result_generator(count, sort_by, sort_order): if sort_by: restaurant_list = sorted(restaurant_list, key=lambda k: k[sort_by], - reverse=sort_order) + reverse=low_to_high) for restaurant in restaurant_list[:count]: yield restaurant @@ -169,32 +170,32 @@ def get_geojson(result): # functions for dealing with command line args -def sort_order(args): - if 'reverse' not in args: - return True - else: - return False - - -def sort_by(args): - sort_by = { - 'highscore': 'High Score', - 'averagescore': 'Average Score', - 'mostinspections': 'Total Inspections', - } - for order in sort_by: - if order in args: - return sort_by[order] - - -def get_count(args): - for arg in args: - try: - arg = int(arg) - return arg - except ValueError: - continue - return 10 +# def sort_order(args): +# if 'reverse' not in args: +# return True +# else: +# return False + + +# def sorter(args): +# sort_by = { +# 'highscore': 'High Score', +# 'averagescore': 'Average Score', +# 'mostinspections': 'Total Inspections', +# } +# for order in sort_by: +# if order in args: +# return sort_by[order] + + +# def get_count(args): +# for arg in args: +# try: +# arg = int(arg) +# return arg +# except ValueError: +# continue +# return 10 def set_marker_color(sort_by, results): @@ -212,18 +213,43 @@ def set_marker_color(sort_by, results): result['properties']['marker-color'] = '#ffff00' return results -if __name__ == '__main__': + +@click.command() +@click.option('--sort-by', + type=click.Choice(['Average Score', 'High Score', 'Total Inspections']), + prompt=True, + help='Sorting options: averagescore, highscore, mostinspections') +@click.option('--low-to-high', is_flag=True, default=True) +@click.option('--count', default=10, prompt=True) +def save_results(sort_by, low_to_high, count): total_result = {'type': 'FeatureCollection', 'features': []} # get command line arguments for sorting, limiting and ordering results # explore argparse or click for better command line interface - args = sys.argv[1:] - count = get_count(args) - sort_by = sort_by(args) - sort_order = sort_order(args) - for result in result_generator(count, sort_by, sort_order): + # args = sys.argv[1:] + # count = get_count(args) + # sort_by = sorter(sort_by) + # sort_order = sort_order(args) + for result in result_generator(sort_by, low_to_high, count): geojson = get_geojson(result) total_result['features'].append(geojson) # set marker-color property for result set based on sorting criteria total_result['features'] = set_marker_color(sort_by, total_result['features']) with open('my_map.json', 'w') as fh: json.dump(total_result, fh) + +if __name__ == '__main__': + save_results() + # total_result = {'type': 'FeatureCollection', 'features': []} + # # get command line arguments for sorting, limiting and ordering results + # # explore argparse or click for better command line interface + # args = sys.argv[1:] + # count = get_count(args) + # sort_by = sort_by(args) + # sort_order = sort_order(args) + # for result in result_generator(count, sort_by, sort_order): + # geojson = get_geojson(result) + # total_result['features'].append(geojson) + # # set marker-color property for result set based on sorting criteria + # total_result['features'] = set_marker_color(sort_by, total_result['features']) + # with open('my_map.json', 'w') as fh: + # json.dump(total_result, fh) From 3c90042f0906d569eb42d730aa59d7742427639e Mon Sep 17 00:00:00 2001 From: robalford Date: Tue, 2 Feb 2016 08:54:37 -0800 Subject: [PATCH 17/21] worked on command line interface using click --- resources/session04/soup/mashup.py | 71 ++++++------------------------ 1 file changed, 14 insertions(+), 57 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index aa4b2128..a899541e 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -7,6 +7,8 @@ import re import requests import click +import webbrowser +import urllib.parse INSPECTION_DOMAIN = 'http://info.kingcounty.gov/' @@ -120,7 +122,7 @@ def get_score_data(elem): return data -def result_generator(sort_by, low_to_high, count): +def result_generator(sort_by, high_to_low, count): use_params = { 'Inspection_Start': '2/1/2013', 'Inspection_End': '2/1/2015', @@ -140,7 +142,7 @@ def result_generator(sort_by, low_to_high, count): if sort_by: restaurant_list = sorted(restaurant_list, key=lambda k: k[sort_by], - reverse=low_to_high) + reverse=high_to_low) for restaurant in restaurant_list[:count]: yield restaurant @@ -169,35 +171,6 @@ def get_geojson(result): return geojson -# functions for dealing with command line args -# def sort_order(args): -# if 'reverse' not in args: -# return True -# else: -# return False - - -# def sorter(args): -# sort_by = { -# 'highscore': 'High Score', -# 'averagescore': 'Average Score', -# 'mostinspections': 'Total Inspections', -# } -# for order in sort_by: -# if order in args: -# return sort_by[order] - - -# def get_count(args): -# for arg in args: -# try: -# arg = int(arg) -# return arg -# except ValueError: -# continue -# return 10 - - def set_marker_color(sort_by, results): # calculate the average score for this sample size and sorting criteria scores = [result['properties'][sort_by] for result in results] @@ -218,38 +191,22 @@ def set_marker_color(sort_by, results): @click.option('--sort-by', type=click.Choice(['Average Score', 'High Score', 'Total Inspections']), prompt=True, - help='Sorting options: averagescore, highscore, mostinspections') -@click.option('--low-to-high', is_flag=True, default=True) -@click.option('--count', default=10, prompt=True) -def save_results(sort_by, low_to_high, count): + help='Sorting options: Average Score, High Score, Total Inspections') +@click.option('--high-to-low/--low-to-high', + default=True, + help='Select sort order.') +@click.option('--count', + default=10, + prompt=True, + help='Select number of results.') +def save_results(sort_by, high_to_low, count): total_result = {'type': 'FeatureCollection', 'features': []} - # get command line arguments for sorting, limiting and ordering results - # explore argparse or click for better command line interface - # args = sys.argv[1:] - # count = get_count(args) - # sort_by = sorter(sort_by) - # sort_order = sort_order(args) - for result in result_generator(sort_by, low_to_high, count): + for result in result_generator(sort_by, high_to_low, count): geojson = get_geojson(result) total_result['features'].append(geojson) - # set marker-color property for result set based on sorting criteria total_result['features'] = set_marker_color(sort_by, total_result['features']) with open('my_map.json', 'w') as fh: json.dump(total_result, fh) if __name__ == '__main__': save_results() - # total_result = {'type': 'FeatureCollection', 'features': []} - # # get command line arguments for sorting, limiting and ordering results - # # explore argparse or click for better command line interface - # args = sys.argv[1:] - # count = get_count(args) - # sort_by = sort_by(args) - # sort_order = sort_order(args) - # for result in result_generator(count, sort_by, sort_order): - # geojson = get_geojson(result) - # total_result['features'].append(geojson) - # # set marker-color property for result set based on sorting criteria - # total_result['features'] = set_marker_color(sort_by, total_result['features']) - # with open('my_map.json', 'w') as fh: - # json.dump(total_result, fh) From 8dd90f2e8376cd0203c92a23d3ed36636a323039 Mon Sep 17 00:00:00 2001 From: robalford Date: Tue, 2 Feb 2016 10:18:25 -0800 Subject: [PATCH 18/21] got map to open when program runs --- resources/session04/soup/mashup.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index a899541e..55294d3f 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -205,8 +205,13 @@ def save_results(sort_by, high_to_low, count): geojson = get_geojson(result) total_result['features'].append(geojson) total_result['features'] = set_marker_color(sort_by, total_result['features']) - with open('my_map.json', 'w') as fh: - json.dump(total_result, fh) + # open map in geojson.io with your data + map_url = 'http://geojson.io/#data=data:application/json,' + escaped_geojson = urllib.parse.quote(json.dumps(total_result)) # use json to convert dict to string for escaping + geojson_url = map_url + escaped_geojson + webbrowser.open(geojson_url) + # with open('my_map.json', 'w') as fh: + # json.dump(total_result, fh) if __name__ == '__main__': save_results() From bcd01f4887e4018c72d46aa79f8c4d16df477ab9 Mon Sep 17 00:00:00 2001 From: robalford Date: Tue, 2 Feb 2016 10:55:12 -0800 Subject: [PATCH 19/21] moved open_map into its own function --- resources/session04/soup/mashup.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index 55294d3f..203b7298 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -187,6 +187,14 @@ def set_marker_color(sort_by, results): return results +def open_map(total_result): + # open map in geojson.io with your data + map_url = 'http://geojson.io/#data=data:application/json,' + escaped_geojson = urllib.parse.quote(json.dumps(total_result)) # use json to convert dict to string for escaping + geojson_url = map_url + escaped_geojson + webbrowser.open(geojson_url) + + @click.command() @click.option('--sort-by', type=click.Choice(['Average Score', 'High Score', 'Total Inspections']), @@ -199,19 +207,15 @@ def set_marker_color(sort_by, results): default=10, prompt=True, help='Select number of results.') -def save_results(sort_by, high_to_low, count): +def display_results(sort_by, high_to_low, count): total_result = {'type': 'FeatureCollection', 'features': []} for result in result_generator(sort_by, high_to_low, count): geojson = get_geojson(result) total_result['features'].append(geojson) total_result['features'] = set_marker_color(sort_by, total_result['features']) - # open map in geojson.io with your data - map_url = 'http://geojson.io/#data=data:application/json,' - escaped_geojson = urllib.parse.quote(json.dumps(total_result)) # use json to convert dict to string for escaping - geojson_url = map_url + escaped_geojson - webbrowser.open(geojson_url) # with open('my_map.json', 'w') as fh: # json.dump(total_result, fh) + open_map(total_result) if __name__ == '__main__': - save_results() + display_results() From 14845e6e0ce984caca8f8aae127c5edeb38aa351 Mon Sep 17 00:00:00 2001 From: robalford Date: Tue, 2 Feb 2016 12:25:48 -0800 Subject: [PATCH 20/21] wrote tests for mashup --- resources/session04/soup/mashup.py | 1 + resources/session04/soup/test_mashup.py | 44 +++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 resources/session04/soup/test_mashup.py diff --git a/resources/session04/soup/mashup.py b/resources/session04/soup/mashup.py index 203b7298..ec4fd350 100644 --- a/resources/session04/soup/mashup.py +++ b/resources/session04/soup/mashup.py @@ -208,6 +208,7 @@ def open_map(total_result): prompt=True, help='Select number of results.') def display_results(sort_by, high_to_low, count): + """A program to display the results of restaurant health inspections on a map.""" total_result = {'type': 'FeatureCollection', 'features': []} for result in result_generator(sort_by, high_to_low, count): geojson = get_geojson(result) diff --git a/resources/session04/soup/test_mashup.py b/resources/session04/soup/test_mashup.py new file mode 100644 index 00000000..2816522e --- /dev/null +++ b/resources/session04/soup/test_mashup.py @@ -0,0 +1,44 @@ +import pytest + +from mashup import result_generator, get_geojson, set_marker_color + + +def test_result_generator(): + result_list = [] + for result in result_generator('Average Score', True, 5): + result_list.append(result) + for i in range(len(result_list)-1): + assert result_list[i]['Average Score'] >= result_list[i+1]['Average Score'] + assert len(result_list) == 5 + result_list = [] + for result in result_generator('High Score', False, 5): + result_list.append(result) + for i in range(len(result_list)-1): + assert result_list[i]['High Score'] <= result_list[i+1]['High Score'] + assert len(result_list) == 5 + result_list = [] + for result in result_generator('Total Inspections', True, 1): + result_list.append(result) + assert len(result_list) == 1 + + +def test_set_marker_color(): + # generate geojson result set + total_result = {'type': 'FeatureCollection', 'features': []} + for result in result_generator('Average Score', True, 10): + geojson = get_geojson(result) + total_result['features'].append(geojson) + total_result['features'] = set_marker_color('Average Score', total_result['features']) + # calculate avg. score of result set + scores = [result['properties']['Average Score'] for result in total_result['features']] + avg_score = sum(scores)/len(scores) + # assert color values set according to score + for i in range(len(total_result)-1): + if total_result['features'][i]['properties']['Average Score'] >= (avg_score+5): + assert total_result['features'][i]['properties']['marker-color'] == '#00ff00' + elif total_result['features'][i]['properties']['Average Score'] <= (avg_score-5): + assert total_result['features'][i]['properties']['marker-color'] == '#ff0000' + else: + assert total_result['features'][i]['properties']['marker-color'] == '#ffff00' + + From e01bdb72cda8c213c6b2e53b5793e0a58cbb7f27 Mon Sep 17 00:00:00 2001 From: robalford Date: Tue, 2 Feb 2016 12:47:54 -0800 Subject: [PATCH 21/21] added a readme file with instructions for running the program --- resources/session04/soup/README.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 resources/session04/soup/README.md diff --git a/resources/session04/soup/README.md b/resources/session04/soup/README.md new file mode 100644 index 00000000..382440f1 --- /dev/null +++ b/resources/session04/soup/README.md @@ -0,0 +1,29 @@ +MASHUP.PY +========= + +Instructions: +------------- + +A program to view restaurant health inspection data for an admitedly +limited geographical area of Seattle via a color-coded map. + +Type `python mashup.py` from the command line to run the program. + +The first prompt will ask for sorting criteria. You may choose from +the following selections: + +* Average Score +* High Score +* Total Inspections + +The second prompt will ask for a count value to determine the number of +results to display on the map. + +Optional command line parameters when running program: + +* `python mashup.py --help`: display help screen. +* `python mashup.py --low-to-high`: display the lowest health inspection +scores for your selected sorting criteria. Defaults to 'high-to-low' + + +