From 25e5b3a8d0a4cf75cbf48f5bf54ae2f18832b38a Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Mon, 16 Feb 2015 22:58:11 -0700 Subject: [PATCH 01/24] basic sql2csv --- 24_sql2csv.py | 17 +++++++++++++++++ readme.md | 3 ++- 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 24_sql2csv.py diff --git a/24_sql2csv.py b/24_sql2csv.py new file mode 100644 index 0000000..19d0c42 --- /dev/null +++ b/24_sql2csv.py @@ -0,0 +1,17 @@ +import sys +import csv +import sqlite3 + +if len(sys.argv) < 3: + print "Use: {0} DATABASE_NAME TABLE_NAME".format(sys.argv[0]) + exit() + +conn = sqlite3.connect(sys.argv[1]) +cur = conn.cursor() +data = cur.execute("SELECT * FROM {0}".format(sys.argv[2])) + +with open('output.csv', 'wb') as f: + writer = csv.writer(f) + writer.writerows(data) + +conn.close() diff --git a/readme.md b/readme.md index 2e3022d..33caedd 100644 --- a/readme.md +++ b/readme.md @@ -22,4 +22,5 @@ 1. **20_restore_file_from_git.py**: Restore file from Git History 1. **21_twitter_bot.py**: Twitter Bot 1. **22_git_tag.py**: Create Git Tag based on a commit -1. **23_flask_session_test.py**: Just a simple app to see if the sessions are working. \ No newline at end of file +1. **23_flask_session_test.py**: Just a simple app to see if the sessions are working +1. **24_sql2csv.py**: SQL to CSV. \ No newline at end of file From 5bb36799013bd5cf1762ff8ba3ee41eaa3c6abbb Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Wed, 18 Feb 2015 11:56:18 -0700 Subject: [PATCH 02/24] added ip -> geolocation script --- 25_ip2geolocation.py | 56 ++++++++++++++++++++++++++++++++++++++++++++ 25_sample_csv.csv | 6 +++++ readme.md | 3 ++- 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 25_ip2geolocation.py create mode 100644 25_sample_csv.csv diff --git a/25_ip2geolocation.py b/25_ip2geolocation.py new file mode 100644 index 0000000..f312989 --- /dev/null +++ b/25_ip2geolocation.py @@ -0,0 +1,56 @@ +import csv +import requests + + +def get_addresses(filename): + """ + Given a CSV file, this function returns a list of lists + where each element (list) in the outer list contains the + row info from the csv file. + """ + all_addresses = [] + with open(filename, 'rb') as f: + reader = csv.reader(f) + for row in reader: + all_addresses.append(row) + return all_addresses + + +def get_geolocation(all_the_ip_address): + """ + Given a list of lists from `get_addresses()`, this function + returns an updated lists of lists containing the geolocation. + """ + print("Getting geo information...") + updated_addresses = [] + counter = 1 + # update header + header_row = all_the_ip_address.pop(0) + header_row.extend(['Country', 'City']) + # get geolocation + for line in all_the_ip_address: + print "Grabbing geo info for row # {0}".format(counter) + r = requests.get('https://freegeoip.net/json/%7B0%7D'.format(line[0])) + line.extend([str(r.json()['country_name']), str(r.json()['city'])]) + updated_addresses.append(line) + counter += 1 + updated_addresses.insert(0, header_row) + return updated_addresses + + +def create_csv(updated_address_list): + """ + Given the updated lists of lists from `get_geolocation()`, this function + creates a new CSV. + """ + with open('output.csv', 'wb') as f: + writer = csv.writer(f) + writer.writerows(updated_address_list) + print "All done!" + + +if __name__ == '__main__': + csv_file = '25_sample_csv.csv' + all_the_ip_address = get_addresses(csv_file) + updated_address_list = get_geolocation(all_the_ip_address) + create_csv(updated_address_list) diff --git a/25_sample_csv.csv b/25_sample_csv.csv new file mode 100644 index 0000000..9943c95 --- /dev/null +++ b/25_sample_csv.csv @@ -0,0 +1,6 @@ +IP Address,Full Name,Id,Email +162.252.85.172,Virgie Simonis,0,Tatyana_Barton@domenico.net +208.110.83.202,Tyrese Bartoletti,1,Birdie.Greenholt@annetta.co.uk +108.162.199.95,Markus Sanford,2,Lela_Homenick@philip.net +169.228.182.227,Anastasia Sawayn,3,Abe@camylle.name +184.72.242.188,Ashly Howe,5,Kieran.Bashirian@ansley.com \ No newline at end of file diff --git a/readme.md b/readme.md index 33caedd..ad5d157 100644 --- a/readme.md +++ b/readme.md @@ -23,4 +23,5 @@ 1. **21_twitter_bot.py**: Twitter Bot 1. **22_git_tag.py**: Create Git Tag based on a commit 1. **23_flask_session_test.py**: Just a simple app to see if the sessions are working -1. **24_sql2csv.py**: SQL to CSV. \ No newline at end of file +1. **24_sql2csv.py**: SQL to CSV. +1. **25_ip2geolocation.py**: Given a CSV file with an ip address (see sample - *25_sample_csv.csv*), return the geolocation based on the ip. \ No newline at end of file From 0bb4c8c25500f35feb166fb5cfc29e8e66d8f5fe Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sun, 17 May 2015 03:49:35 -0600 Subject: [PATCH 03/24] added stock scraper, converted all scripts to python 2/3 compatibility --- .gitignore | 4 ++- 02_find_all_links.py | 10 +++---- 03_simple_twitter_manager.py | 19 +++++++------ 04_rename_with_slice.py | 6 ++-- 05_load_json_without_dupes.py | 8 ++---- 06_execution_time.py | 7 +++-- 07_benchmark_permissions_loading_django.py | 6 ++-- 08_basic_email_web_crawler.py | 15 ++++++---- 09_basic_link_web_crawler.py | 13 +++++---- 10_find_files_recursively.py | 12 ++++---- 11_optimize_images_with_wand.py | 15 +++++----- 12_csv_split.py | 8 +++--- 13_random_name_generator.py | 4 +-- 15_check_my_environment.py | 6 ++-- 18_zipper.py | 2 +- 20_restore_file_from_git.py | 4 +-- 22_git_tag.py | 2 +- 24_sql2csv.py | 2 +- 25_ip2geolocation.py | 13 ++++++--- 26_stock_scraper.py | 32 ++++++++++++++++++++++ readme.md | 3 +- requirements.txt | 10 +++++++ 22 files changed, 129 insertions(+), 72 deletions(-) create mode 100644 26_stock_scraper.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 449fe6d..be6b6be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .pyc .DS_Store -_tmp \ No newline at end of file +_tmp +env +__pycache__ \ No newline at end of file diff --git a/02_find_all_links.py b/02_find_all_links.py index 76a7c99..37dff11 100644 --- a/02_find_all_links.py +++ b/02_find_all_links.py @@ -1,18 +1,18 @@ -import urllib2 +import requests import re # get url -url =raw_input('Enter a URL (include `http://`): ') +url = input('Enter a URL (include `http://`): ') # connect to the url -website = urllib2.urlopen(url) +website = requests.get(url) # read html -html = website.read() +html = website.text # use re.findall to grab all the links links = re.findall('"((http|ftp)s?://.*?)"', html) # output links for link in links: - print link[0] \ No newline at end of file + print(link[0]) diff --git a/03_simple_twitter_manager.py b/03_simple_twitter_manager.py index b7e4a51..e39a20b 100644 --- a/03_simple_twitter_manager.py +++ b/03_simple_twitter_manager.py @@ -1,25 +1,28 @@ import twitter - - + + TWITTER_CONSUMER_KEY = 'XXX' TWITTER_CONSUMER_SECRET = 'XXX' TWITTER_ACCESS_TOKEN_KEY = 'XXX' TWITTER_ACCESS_TOKEN_SECRET = 'XXX' - + twitter_api = twitter.Api( consumer_key=TWITTER_CONSUMER_KEY, consumer_secret=TWITTER_CONSUMER_SECRET, access_token_key=TWITTER_ACCESS_TOKEN_KEY, access_token_secret=TWITTER_ACCESS_TOKEN_SECRET ) - + if __name__ == '__main__': follower_ids = twitter_api.GetFollowerIDs() following_ids = twitter_api.GetFriendIDs() - zombie_follows = [following_id for following_id in following_ids if following_id not in follower_ids] - - confirm = raw_input("Are you sure you want to unfollow %s tweeps [y|n]? " % (len(zombie_follows))) + zombie_follows = [following_id for following_id in + following_ids if following_id not in follower_ids] + + confirm = raw_input( + "Are you sure you want to unfollow {0} tweeps [y|n]? ".format( + (len(zombie_follows)))) if confirm.lower() == 'y': for id in zombie_follows: user = twitter_api.DestroyFriendship(user_id=id) - print "Unfollowed %s" % (user.screen_name) \ No newline at end of file + print("Unfollowed {0}".format(user.screen_name)) diff --git a/04_rename_with_slice.py b/04_rename_with_slice.py index 2ff84f7..dd849ef 100644 --- a/04_rename_with_slice.py +++ b/04_rename_with_slice.py @@ -8,7 +8,7 @@ new_file_name = file_name[:-6] + extension try: os.rename(file, new_file_name) - except OSError, e: - print e + except OSError as e: + print(e) else: - print "Renamed {} to {}".format(file, new_file_name) + print("Renamed {} to {}".format(file, new_file_name)) diff --git a/05_load_json_without_dupes.py b/05_load_json_without_dupes.py index 5b767a2..2cbe318 100644 --- a/05_load_json_without_dupes.py +++ b/05_load_json_without_dupes.py @@ -1,11 +1,9 @@ -import json - def dict_raise_on_duplicates(ordered_pairs): """reject duplicate keys""" my_dict = dict() for key, values in ordered_pairs: if key in my_dict: - raise ValueError("Duplicate key: {}".format(key,)) + raise ValueError("Duplicate key: {}".format(key,)) else: - my_dict[key] = values - return my_dict \ No newline at end of file + my_dict[key] = values + return my_dict diff --git a/06_execution_time.py b/06_execution_time.py index 95d5ca4..9614bbd 100644 --- a/06_execution_time.py +++ b/06_execution_time.py @@ -13,6 +13,7 @@ import time +import random class ExecutionTime: @@ -25,9 +26,9 @@ def duration(self): # ---- run code ---- # -import random timer = ExecutionTime() sample_list = list() -my_list = [random.randint(1, 888898) for num in xrange(1, 1000000) if num % 2 == 0] -print 'Finished in {} seconds.'.format(timer.duration()) \ No newline at end of file +my_list = [random.randint(1, 888898) for num in + range(1, 1000000) if num % 2 == 0] +print('Finished in {} seconds.'.format(timer.duration())) diff --git a/07_benchmark_permissions_loading_django.py b/07_benchmark_permissions_loading_django.py index 0e2e06b..e1e6900 100644 --- a/07_benchmark_permissions_loading_django.py +++ b/07_benchmark_permissions_loading_django.py @@ -14,8 +14,8 @@ def timed(*args, **kw): te = time.time() all_times.append(te - ts) - print all_times - print numpy.mean(all_times) + print(all_times) + print(numpy.mean(all_times)) return result return timed @@ -39,4 +39,4 @@ def load_new_perms(): while n < 10: create_new_db() load_new_perms() - n += 1 \ No newline at end of file + n += 1 diff --git a/08_basic_email_web_crawler.py b/08_basic_email_web_crawler.py index faca75f..9c6c58f 100644 --- a/08_basic_email_web_crawler.py +++ b/08_basic_email_web_crawler.py @@ -1,6 +1,9 @@ import requests import re -import urlparse +try: + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin # regex email_re = re.compile(r'([\w\.,]+@[\w\.,]+\.\w+)') @@ -20,13 +23,13 @@ def crawl(url): # Find links links = link_re.findall(req.text) - print "\nFound {} links".format(len(links)) + print("\nFound {} links".format(len(links))) # Search links for emails for link in links: # Get an absolute URL for a link - link = urlparse.urljoin(url, link) + link = urljoin(url, link) # Find all emails on current page result.update(email_re.findall(req.text)) @@ -36,7 +39,7 @@ def crawl(url): if __name__ == '__main__': emails = crawl('http://www.realpython.com/') - print "\nScrapped e-mail addresses:" + print("\nScrapped e-mail addresses:") for email in emails: - print email - print "\n" + print(email) + print("\n") diff --git a/09_basic_link_web_crawler.py b/09_basic_link_web_crawler.py index 4531ac3..87e2fab 100644 --- a/09_basic_link_web_crawler.py +++ b/09_basic_link_web_crawler.py @@ -1,6 +1,9 @@ import requests import re -import urlparse +try: + from urllib.parse import urljoin +except ImportError: + from urlparse import urljoin # regex link_re = re.compile(r'href="http://github.com/(.*?)"') @@ -17,17 +20,15 @@ def crawl(url): # Find links links = link_re.findall(req.text) - print "\nFound {} links".format(len(links)) + print("\nFound {} links".format(len(links))) # Search links for emails for link in links: # Get an absolute URL for a link - link = urlparse.urljoin(url, link) + link = urljoin(url, link) - print link - + print(link) if __name__ == '__main__': crawl('http://www.realpython.com/') - diff --git a/10_find_files_recursively.py b/10_find_files_recursively.py index 7251b10..91cd73c 100644 --- a/10_find_files_recursively.py +++ b/10_find_files_recursively.py @@ -2,7 +2,7 @@ import os # constants -PATH = '/../../../..' +PATH = './' PATTERN = '*.py' @@ -14,18 +14,18 @@ def get_file_names(filepath, pattern): # matches.append(os.path.join(root, filename)) # full path matches.append(os.path.join(filename)) # just file name if matches: - print "Found {} files:".format(len(matches)) + print("Found {} files:".format(len(matches))) output_files(matches) else: - print "No files found." + print("No files found.") else: - print "Sorry that path does not exist. Try again." + print("Sorry that path does not exist. Try again.") def output_files(list_of_files): for filename in list_of_files: - print filename + print(filename) if __name__ == '__main__': - all_files = get_file_names(PATH, PATTERN) \ No newline at end of file + all_files = get_file_names(PATH, PATTERN) diff --git a/11_optimize_images_with_wand.py b/11_optimize_images_with_wand.py index c3449fd..a95b8b0 100644 --- a/11_optimize_images_with_wand.py +++ b/11_optimize_images_with_wand.py @@ -1,9 +1,9 @@ import fnmatch import os -# sudo pip install Wand +# pip install Wand from wand.image import Image -# sudo pip install http://pypi.python.org/packages/source/h/hurry.filesize/hurry.filesize-0.9.tar.gz +# pip install http://pypi.python.org/packages/source/h/hurry.filesize/hurry.filesize-0.9.tar.gz from hurry.filesize import size @@ -19,12 +19,13 @@ def get_image_file_names(filepath, pattern): for filename in fnmatch.filter(filenames, pattern): matches.append(os.path.join(root, filename)) # full path if matches: - print "Found {} files, with a total file size of {}.".format(len(matches), get_total_size(matches)) + print("Found {} files, with a total file size of {}.".format( + len(matches), get_total_size(matches))) return matches else: - print "No files found." + print("No files found.") else: - print "Sorry that path does not exist. Try again." + print("Sorry that path does not exist. Try again.") def get_total_size(list_of_image_names): @@ -35,7 +36,7 @@ def get_total_size(list_of_image_names): def resize_images(list_of_image_names): - print "Optimizing ... " + print("Optimizing ... ") for index, image_name in enumerate(list_of_image_names): with open(image_name) as f: image_binary = f.read() @@ -43,7 +44,7 @@ def resize_images(list_of_image_names): if img.height >= 600: img.transform(resize='x600') img.save(filename=image_name) - print "Optimization complete." + print("Optimization complete.") if __name__ == '__main__': diff --git a/12_csv_split.py b/12_csv_split.py index 65c698c..43ed1ee 100644 --- a/12_csv_split.py +++ b/12_csv_split.py @@ -117,10 +117,10 @@ def parse_file(arguments): writer = writer.writerows(chunk) # Output info - print "" - print "Chunk # {}:".format(current_chunk) - print "Filepath: {}".format(current_output) - print "# of rows: {}".format(len(chunk)) + print("") + print("Chunk # {}:".format(current_chunk)) + print("Filepath: {}".format(current_output)) + print("# of rows: {}".format(len(chunk))) # Create new chunk current_chunk += 1 diff --git a/13_random_name_generator.py b/13_random_name_generator.py index 0719eec..6f0a00a 100644 --- a/13_random_name_generator.py +++ b/13_random_name_generator.py @@ -10,7 +10,7 @@ def random_name_generator(first, second, x): - number of random names """ names = [] - for i in xrange(0, int(x)): + for i in range(0, int(x)): random_first = randint(0, len(first)-1) random_last = randint(0, len(second)-1) names.append("{0} {1}".format( @@ -23,4 +23,4 @@ def random_name_generator(first, second, x): first_names = ["Drew", "Mike", "Landon", "Jeremy", "Tyler", "Tom", "Avery"] last_names = ["Smith", "Jones", "Brighton", "Taylor"] names = random_name_generator(first_names, last_names, 5) -print '\n'.join(names) +print('\n'.join(names)) diff --git a/15_check_my_environment.py b/15_check_my_environment.py index 11017c4..62e0b8d 100644 --- a/15_check_my_environment.py +++ b/15_check_my_environment.py @@ -11,7 +11,7 @@ def __init__(self, configFile): pass def process(self): - print "ok" + print("ok") if __name__ == "__main__": m = Main(some_script.CONFIGFILE) @@ -39,7 +39,7 @@ def get_config_file(): if CONFIGFILE is None: sys.exit("Configuration error! Unknown environment set. \ Edit config.py and set appropriate environment") -print "Config file: {}".format(CONFIGFILE) +print("Config file: {}".format(CONFIGFILE)) if not os.path.exists(CONFIGFILE): sys.exit("Configuration error! Config file does not exist") -print "Config ok ...." +print("Config ok ....") diff --git a/18_zipper.py b/18_zipper.py index a350a70..43c956d 100755 --- a/18_zipper.py +++ b/18_zipper.py @@ -3,7 +3,7 @@ from zipfile import ZipFile -#set file name and time of creation +# set file name and time of creation today = datetime.now() file_name = 'zipper_' + today.strftime('%Y.%m.%dh%H%M') + '.zip' dir_name = 'tmp/' # update path diff --git a/20_restore_file_from_git.py b/20_restore_file_from_git.py index f692d9d..b1f581b 100644 --- a/20_restore_file_from_git.py +++ b/20_restore_file_from_git.py @@ -1,9 +1,9 @@ from subprocess import check_output, call -file_name = str(raw_input('Enter the file name: ')) +file_name = str(input('Enter the file name: ')) commit = check_output(["git", "rev-list", "-n", "1", "HEAD", "--", file_name]) -print str(commit).rstrip() +print(str(commit).rstrip()) call(["git", "checkout", str(commit).rstrip()+"~1", file_name]) diff --git a/22_git_tag.py b/22_git_tag.py index 283f495..4849c07 100644 --- a/22_git_tag.py +++ b/22_git_tag.py @@ -10,5 +10,5 @@ subprocess.call(command, shell=True) subprocess.call('git push --tags', shell=True) else: - print 'usage: tag.py TAG_NAME COMMIT' + print('usage: tag.py TAG_NAME COMMIT') sys.exit(1) diff --git a/24_sql2csv.py b/24_sql2csv.py index 19d0c42..4e8f484 100644 --- a/24_sql2csv.py +++ b/24_sql2csv.py @@ -3,7 +3,7 @@ import sqlite3 if len(sys.argv) < 3: - print "Use: {0} DATABASE_NAME TABLE_NAME".format(sys.argv[0]) + print("Use: {0} DATABASE_NAME TABLE_NAME".format(sys.argv[0])) exit() conn = sqlite3.connect(sys.argv[1]) diff --git a/25_ip2geolocation.py b/25_ip2geolocation.py index f312989..f593676 100644 --- a/25_ip2geolocation.py +++ b/25_ip2geolocation.py @@ -9,7 +9,7 @@ def get_addresses(filename): row info from the csv file. """ all_addresses = [] - with open(filename, 'rb') as f: + with open(filename, 'rt') as f: reader = csv.reader(f) for row in reader: all_addresses.append(row) @@ -29,7 +29,7 @@ def get_geolocation(all_the_ip_address): header_row.extend(['Country', 'City']) # get geolocation for line in all_the_ip_address: - print "Grabbing geo info for row # {0}".format(counter) + print("Grabbing geo info for row # {0}".format(counter)) r = requests.get('https://freegeoip.net/json/%7B0%7D'.format(line[0])) line.extend([str(r.json()['country_name']), str(r.json()['city'])]) updated_addresses.append(line) @@ -43,10 +43,15 @@ def create_csv(updated_address_list): Given the updated lists of lists from `get_geolocation()`, this function creates a new CSV. """ - with open('output.csv', 'wb') as f: + import sys + if sys.version_info >= (3, 0, 0): + f = open('output.csv', 'w', newline='') + else: + f = open('output.csv', 'wb') + with f: writer = csv.writer(f) writer.writerows(updated_address_list) - print "All done!" + print("All done!") if __name__ == '__main__': diff --git a/26_stock_scraper.py b/26_stock_scraper.py new file mode 100644 index 0000000..3e69cc2 --- /dev/null +++ b/26_stock_scraper.py @@ -0,0 +1,32 @@ +import requests +from lxml import html +from collections import defaultdict + + +def get_stocks(url): + # Make Request + page = requests.get(url) + # Parse/Scrape + tree = html.fromstring(page.text) + xpath = '//*[@id="mw-content-text"]/table[1]' + rows = tree.xpath(xpath)[0].findall("tr") + rows = [(row.getchildren()[0], row.getchildren()[3]) for row in rows[1:]] + rows = [(row[0].getchildren()[0].text, row[1].text) for row in rows] + industries = defaultdict(list) + for row in rows: + industries[row[1]].append(row[0]) + return industries + + +def output_data(data_dict): + for industry in data_dict: + print('\n'+industry) + print('-'*len(industry)) + for ticker in data_dict[industry]: + print(ticker) + + +if __name__ == '__main__': + url = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies' + scraped_data = get_stocks(url) + output_data(scraped_data) diff --git a/readme.md b/readme.md index ad5d157..2e00620 100644 --- a/readme.md +++ b/readme.md @@ -24,4 +24,5 @@ 1. **22_git_tag.py**: Create Git Tag based on a commit 1. **23_flask_session_test.py**: Just a simple app to see if the sessions are working 1. **24_sql2csv.py**: SQL to CSV. -1. **25_ip2geolocation.py**: Given a CSV file with an ip address (see sample - *25_sample_csv.csv*), return the geolocation based on the ip. \ No newline at end of file +1. **25_ip2geolocation.py**: Given a CSV file with an ip address (see sample - *25_sample_csv.csv*), return the geolocation based on the ip. +1. **26_stock_scraper.py**: Scrape the S&P 500 Companies list from Wikipedia, then output he data. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d1a3d68 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +Flask==0.10.1 +Jinja2==2.7.3 +MarkupSafe==0.23 +Wand==0.4.0 +Werkzeug==0.10.4 +hurry.filesize==0.9 +itsdangerous==0.24 +lxml==3.4.4 +numpy==1.9.2 +requests==2.7.0 From 125a657ba89571fc08364765b6aaf15ea1661d61 Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sun, 13 Sep 2015 13:06:56 -0600 Subject: [PATCH 04/24] updated gitignore and requirements.txt, added send sms script --- .gitignore | 3 ++- 27_send_sms.py | 12 ++++++++++++ readme.md | 3 ++- requirements.txt | 1 + 4 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 27_send_sms.py diff --git a/.gitignore b/.gitignore index be6b6be..3873c3a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .DS_Store _tmp env -__pycache__ \ No newline at end of file +venv +__pycache__ diff --git a/27_send_sms.py b/27_send_sms.py new file mode 100644 index 0000000..0ad1d58 --- /dev/null +++ b/27_send_sms.py @@ -0,0 +1,12 @@ +import requests + +message = raw_input('Enter a Message: ') +number = raw_input('Enter the phone number: ') + + +payload = {'number': number, 'message': message} +r = requests.post("http://textbelt.com/text", data=payload) +if r.json()['success']: + print('Success!') +else: + print('Error!') diff --git a/readme.md b/readme.md index 2e00620..19551da 100644 --- a/readme.md +++ b/readme.md @@ -25,4 +25,5 @@ 1. **23_flask_session_test.py**: Just a simple app to see if the sessions are working 1. **24_sql2csv.py**: SQL to CSV. 1. **25_ip2geolocation.py**: Given a CSV file with an ip address (see sample - *25_sample_csv.csv*), return the geolocation based on the ip. -1. **26_stock_scraper.py**: Scrape the S&P 500 Companies list from Wikipedia, then output he data. +1. **26_stock_scraper.py**: Scrape the S&P 500 Companies list from Wikipedia, then output the data. +1. **27_send_sms.py**: Send SMS message via [TextBelt](http://textbelt.com/) diff --git a/requirements.txt b/requirements.txt index d1a3d68..baafbf3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ itsdangerous==0.24 lxml==3.4.4 numpy==1.9.2 requests==2.7.0 +wheel==0.24.0 From b287df6e1811c4a06bae3b04b49a2f7057406259 Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sun, 13 Sep 2015 18:26:43 -0600 Subject: [PATCH 05/24] income tax calculator --- 28_income_tax_calculator.py | 23 +++++++++++++++++++++++ readme.md | 1 + 2 files changed, 24 insertions(+) create mode 100644 28_income_tax_calculator.py diff --git a/28_income_tax_calculator.py b/28_income_tax_calculator.py new file mode 100644 index 0000000..3fc0758 --- /dev/null +++ b/28_income_tax_calculator.py @@ -0,0 +1,23 @@ +import requests + +headers = { + 'Content-Type': 'application/x-www-form-urlencoded', + 'Accept': 'application/json', +} + +data = { + 'pay_rate': '10000', + 'filing_status': 'single', + 'pay_periods': 1, + 'state': 'CO', + 'year': + '2014' +} + +r = requests.post( + 'http://taxee.io/api/v1/calculate/2014', + data=data, + headers=headers +) + +print(r.text) diff --git a/readme.md b/readme.md index 19551da..722cfcb 100644 --- a/readme.md +++ b/readme.md @@ -27,3 +27,4 @@ 1. **25_ip2geolocation.py**: Given a CSV file with an ip address (see sample - *25_sample_csv.csv*), return the geolocation based on the ip. 1. **26_stock_scraper.py**: Scrape the S&P 500 Companies list from Wikipedia, then output the data. 1. **27_send_sms.py**: Send SMS message via [TextBelt](http://textbelt.com/) +1. **28_income_tax_calculator.py**: Income tax calcuator via [Taxee](http://taxee.io/) From 24d6113bd74ce9e8b1e6bbe5d13ec4e4e49d2c92 Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sun, 11 Oct 2015 09:28:50 -0600 Subject: [PATCH 06/24] added json to yaml script --- 29_json_test.json | 31 +++++++++++++++++++++++++++++++ 29_json_to_yaml.py | 16 ++++++++++++++++ readme.md | 5 +++-- requirements.txt | 10 +--------- 4 files changed, 51 insertions(+), 11 deletions(-) create mode 100644 29_json_test.json create mode 100644 29_json_to_yaml.py diff --git a/29_json_test.json b/29_json_test.json new file mode 100644 index 0000000..ebe6459 --- /dev/null +++ b/29_json_test.json @@ -0,0 +1,31 @@ +{ + "colorsArray":[{ + "colorName":"red", + "hexValue":"#f00" + }, + { + "colorName":"green", + "hexValue":"#0f0" + }, + { + "colorName":"blue", + "hexValue":"#00f" + }, + { + "colorName":"cyan", + "hexValue":"#0ff" + }, + { + "colorName":"magenta", + "hexValue":"#f0f" + }, + { + "colorName":"yellow", + "hexValue":"#ff0" + }, + { + "colorName":"black", + "hexValue":"#000" + } + ] +} diff --git a/29_json_to_yaml.py b/29_json_to_yaml.py new file mode 100644 index 0000000..b22f64d --- /dev/null +++ b/29_json_to_yaml.py @@ -0,0 +1,16 @@ +import sys +import json +import yaml + +""" +Example usage: + +$ python 29_json_to_yaml.py 29_json_test.json +""" + +# load json data +json_data = json.loads(open(sys.argv[1]).read()) +# convert unicode to string +converted_json_data = json.dumps(json_data) +# output yaml +print(yaml.dump(yaml.load(converted_json_data), default_flow_style=False)) diff --git a/readme.md b/readme.md index 722cfcb..cf967f4 100644 --- a/readme.md +++ b/readme.md @@ -4,7 +4,7 @@ 1. **02_find_all_links.py**: get all links from a webpage 1. **03_simple_twitter_manager.py**: accessing the Twitter API, example functions 1. **04_rename_with_slice.py**: rename group of files, within a single directory, using slice -1. **05_load_json_without_dupes.py**: load json, convert to dict, raise error if there is a duplicate key +1. **05_load_json_without_dupes.py**: load JSON, convert to dict, raise error if there is a duplicate key 1. **06_execution_time.py**: class used for timing execution of code 1. **07_benchmark_permissions_loading_django.py**: benchmark loading of permissions in Django 1. **08_basic_email_web_crawler.py**: web crawler for grabbing emails from a website @@ -27,4 +27,5 @@ 1. **25_ip2geolocation.py**: Given a CSV file with an ip address (see sample - *25_sample_csv.csv*), return the geolocation based on the ip. 1. **26_stock_scraper.py**: Scrape the S&P 500 Companies list from Wikipedia, then output the data. 1. **27_send_sms.py**: Send SMS message via [TextBelt](http://textbelt.com/) -1. **28_income_tax_calculator.py**: Income tax calcuator via [Taxee](http://taxee.io/) +1. **28_income_tax_calculator.py**: Income tax calculator via [Taxee](http://taxee.io/) +1. **29_json_to_yaml.py**: Convert JSON to YAML diff --git a/requirements.txt b/requirements.txt index baafbf3..9d61831 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,3 @@ -Flask==0.10.1 -Jinja2==2.7.3 -MarkupSafe==0.23 -Wand==0.4.0 -Werkzeug==0.10.4 -hurry.filesize==0.9 -itsdangerous==0.24 -lxml==3.4.4 -numpy==1.9.2 +PyYAML==3.11 requests==2.7.0 wheel==0.24.0 From f6bece3c902cd71ef37ec0118cb1f9f729985c5a Mon Sep 17 00:00:00 2001 From: Aaron Delaney Date: Thu, 15 Oct 2015 22:51:07 +0100 Subject: [PATCH 07/24] Add +x on all files for owner --- 01_remove_all_pyc.md | 0 02_find_all_links.py | 0 03_simple_twitter_manager.py | 0 04_rename_with_slice.py | 0 05_load_json_without_dupes.py | 0 06_execution_time.py | 0 07_benchmark_permissions_loading_django.py | 0 08_basic_email_web_crawler.py | 0 09_basic_link_web_crawler.py | 0 10_find_files_recursively.py | 0 11_optimize_images_with_wand.py | 0 12_csv_split.py | 0 12_sample_csv.csv | 0 13_random_name_generator.py | 0 14_html_to_markdown.sh | 0 15_check_my_environment.py | 0 16_jinja_quick_load.py | 0 17_rewrite_git_history.md | 0 19_tsv-to-csv.py | 0 20_restore_file_from_git.py | 0 21_twitter_bot.py | 0 22_git_tag.py | 0 23_flask_session_test.py | 0 24_sql2csv.py | 0 25_ip2geolocation.py | 0 25_sample_csv.csv | 0 26_stock_scraper.py | 0 27_send_sms.py | 0 28_income_tax_calculator.py | 0 29_json_test.json | 0 29_json_to_yaml.py | 0 readme.md | 0 requirements.txt | 0 33 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 01_remove_all_pyc.md mode change 100644 => 100755 02_find_all_links.py mode change 100644 => 100755 03_simple_twitter_manager.py mode change 100644 => 100755 04_rename_with_slice.py mode change 100644 => 100755 05_load_json_without_dupes.py mode change 100644 => 100755 06_execution_time.py mode change 100644 => 100755 07_benchmark_permissions_loading_django.py mode change 100644 => 100755 08_basic_email_web_crawler.py mode change 100644 => 100755 09_basic_link_web_crawler.py mode change 100644 => 100755 10_find_files_recursively.py mode change 100644 => 100755 11_optimize_images_with_wand.py mode change 100644 => 100755 12_csv_split.py mode change 100644 => 100755 12_sample_csv.csv mode change 100644 => 100755 13_random_name_generator.py mode change 100644 => 100755 14_html_to_markdown.sh mode change 100644 => 100755 15_check_my_environment.py mode change 100644 => 100755 16_jinja_quick_load.py mode change 100644 => 100755 17_rewrite_git_history.md mode change 100644 => 100755 19_tsv-to-csv.py mode change 100644 => 100755 20_restore_file_from_git.py mode change 100644 => 100755 21_twitter_bot.py mode change 100644 => 100755 22_git_tag.py mode change 100644 => 100755 23_flask_session_test.py mode change 100644 => 100755 24_sql2csv.py mode change 100644 => 100755 25_ip2geolocation.py mode change 100644 => 100755 25_sample_csv.csv mode change 100644 => 100755 26_stock_scraper.py mode change 100644 => 100755 27_send_sms.py mode change 100644 => 100755 28_income_tax_calculator.py mode change 100644 => 100755 29_json_test.json mode change 100644 => 100755 29_json_to_yaml.py mode change 100644 => 100755 readme.md mode change 100644 => 100755 requirements.txt diff --git a/01_remove_all_pyc.md b/01_remove_all_pyc.md old mode 100644 new mode 100755 diff --git a/02_find_all_links.py b/02_find_all_links.py old mode 100644 new mode 100755 diff --git a/03_simple_twitter_manager.py b/03_simple_twitter_manager.py old mode 100644 new mode 100755 diff --git a/04_rename_with_slice.py b/04_rename_with_slice.py old mode 100644 new mode 100755 diff --git a/05_load_json_without_dupes.py b/05_load_json_without_dupes.py old mode 100644 new mode 100755 diff --git a/06_execution_time.py b/06_execution_time.py old mode 100644 new mode 100755 diff --git a/07_benchmark_permissions_loading_django.py b/07_benchmark_permissions_loading_django.py old mode 100644 new mode 100755 diff --git a/08_basic_email_web_crawler.py b/08_basic_email_web_crawler.py old mode 100644 new mode 100755 diff --git a/09_basic_link_web_crawler.py b/09_basic_link_web_crawler.py old mode 100644 new mode 100755 diff --git a/10_find_files_recursively.py b/10_find_files_recursively.py old mode 100644 new mode 100755 diff --git a/11_optimize_images_with_wand.py b/11_optimize_images_with_wand.py old mode 100644 new mode 100755 diff --git a/12_csv_split.py b/12_csv_split.py old mode 100644 new mode 100755 diff --git a/12_sample_csv.csv b/12_sample_csv.csv old mode 100644 new mode 100755 diff --git a/13_random_name_generator.py b/13_random_name_generator.py old mode 100644 new mode 100755 diff --git a/14_html_to_markdown.sh b/14_html_to_markdown.sh old mode 100644 new mode 100755 diff --git a/15_check_my_environment.py b/15_check_my_environment.py old mode 100644 new mode 100755 diff --git a/16_jinja_quick_load.py b/16_jinja_quick_load.py old mode 100644 new mode 100755 diff --git a/17_rewrite_git_history.md b/17_rewrite_git_history.md old mode 100644 new mode 100755 diff --git a/19_tsv-to-csv.py b/19_tsv-to-csv.py old mode 100644 new mode 100755 diff --git a/20_restore_file_from_git.py b/20_restore_file_from_git.py old mode 100644 new mode 100755 diff --git a/21_twitter_bot.py b/21_twitter_bot.py old mode 100644 new mode 100755 diff --git a/22_git_tag.py b/22_git_tag.py old mode 100644 new mode 100755 diff --git a/23_flask_session_test.py b/23_flask_session_test.py old mode 100644 new mode 100755 diff --git a/24_sql2csv.py b/24_sql2csv.py old mode 100644 new mode 100755 diff --git a/25_ip2geolocation.py b/25_ip2geolocation.py old mode 100644 new mode 100755 diff --git a/25_sample_csv.csv b/25_sample_csv.csv old mode 100644 new mode 100755 diff --git a/26_stock_scraper.py b/26_stock_scraper.py old mode 100644 new mode 100755 diff --git a/27_send_sms.py b/27_send_sms.py old mode 100644 new mode 100755 diff --git a/28_income_tax_calculator.py b/28_income_tax_calculator.py old mode 100644 new mode 100755 diff --git a/29_json_test.json b/29_json_test.json old mode 100644 new mode 100755 diff --git a/29_json_to_yaml.py b/29_json_to_yaml.py old mode 100644 new mode 100755 diff --git a/readme.md b/readme.md old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 From 44a9b7d2e8354903aad21e998f7d7e9580fa3c64 Mon Sep 17 00:00:00 2001 From: Taranjeet Date: Fri, 16 Oct 2015 10:42:38 +0530 Subject: [PATCH 08/24] grammatical error : remove_all_pyc.md --- 01_remove_all_pyc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/01_remove_all_pyc.md b/01_remove_all_pyc.md index 2891215..f666e1a 100644 --- a/01_remove_all_pyc.md +++ b/01_remove_all_pyc.md @@ -6,4 +6,4 @@ To recursively remove all those pesky *.pyc* files from a git repo, run this com $ find . -name "*.pyc" -exec git rm -f {} \; ``` -Then make sure to add a *.gitignore* and the root of the repo and add the line: `*.pyc` \ No newline at end of file +Then make sure to add a *.gitignore* in the root of the repo and add the line: `*.pyc` \ No newline at end of file From 436f3119f9ac28c559f96414ba3b3f85cbddb2ef Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sun, 25 Oct 2015 15:20:53 -0600 Subject: [PATCH 09/24] Added fullcontact api --- 30_fullcontact.py | 49 +++++++++++++++++++++++++++++++++++++++++++++++ readme.md | 1 + 2 files changed, 50 insertions(+) create mode 100644 30_fullcontact.py diff --git a/30_fullcontact.py b/30_fullcontact.py new file mode 100644 index 0000000..a612648 --- /dev/null +++ b/30_fullcontact.py @@ -0,0 +1,49 @@ +import sys +import requests + +""" + +1. pip install requests +2. Obtain an API key: https://www.fullcontact.com/developer/pricing/ + +Example usage: + +$ python 30_fullcontact.py email SOME@EMAIL.COM +$ python 30_fullcontact.py twitter TWITTER_HANDLE +""" + + +# constants + +API_KEY = 'GET YOUR OWN' +BASE_URL = 'http://api.fullcontact.com/v2/person.json' + + +# helpers + +def get_arguments(): + if len(sys.argv) is 3: + return { + 'media': sys.argv[1], + 'user_info': sys.argv[2] + } + else: + print('Specify at least 1 argument') + sys.exit() + + +def call_api(contact): + url = BASE_URL + '?{0}={1}&apiKey={2}'.format( + contact['media'], contact['user_info'], API_KEY) + r = requests.get(url) + if r.status_code == 200: + return r.text + else: + return "Sorry, no results found." + + +# main + +if __name__ == "__main__": + media = get_arguments() + print(call_api(media)) diff --git a/readme.md b/readme.md index cf967f4..0bfba2f 100644 --- a/readme.md +++ b/readme.md @@ -29,3 +29,4 @@ 1. **27_send_sms.py**: Send SMS message via [TextBelt](http://textbelt.com/) 1. **28_income_tax_calculator.py**: Income tax calculator via [Taxee](http://taxee.io/) 1. **29_json_to_yaml.py**: Convert JSON to YAML +1. **30_fullcontact.py**: Call the [FullcContact](https://www.fullcontact.com/developer/) API From 761e0ecec220daaf3a0c7dfcd1f36949e670ef2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?RayKon=E2=88=9E?= Date: Wed, 2 Dec 2015 21:51:55 +0530 Subject: [PATCH 10/24] Update 08_basic_email_web_crawler.py This is a much simpler version of the script(easily understandable). --- 08_basic_email_web_crawler.py | 47 +++++++++++------------------------ 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/08_basic_email_web_crawler.py b/08_basic_email_web_crawler.py index 9c6c58f..a7dbbce 100644 --- a/08_basic_email_web_crawler.py +++ b/08_basic_email_web_crawler.py @@ -1,45 +1,26 @@ import requests import re -try: - from urllib.parse import urljoin -except ImportError: - from urlparse import urljoin -# regex -email_re = re.compile(r'([\w\.,]+@[\w\.,]+\.\w+)') -link_re = re.compile(r'href="http://github.com/(.*?)"') +#get url +#url=input('Enter a URL (include 'http://'):')--this is wrong +url = input('Enter a URL (include `http://`): ') -def crawl(url): +#connect to the url +website=requests.get(url) - result = set() +#read html +html=website.text - req = requests.get(url) - # Check if successful - if(req.status_code != 200): - return [] +#use re.findall to grab all the links +links = re.findall('"((http|ftp)s?://.*?)"', html) - # Find links - links = link_re.findall(req.text) +emails=re.findall('([\w\.,]+@[\w\.,]+\.\w+)',html) - print("\nFound {} links".format(len(links))) - # Search links for emails - for link in links: +#prints the number of links in the list +print("\nFound {} links".format(len(links))) - # Get an absolute URL for a link - link = urljoin(url, link) - - # Find all emails on current page - result.update(email_re.findall(req.text)) - - return result - -if __name__ == '__main__': - emails = crawl('http://www.realpython.com/') - - print("\nScrapped e-mail addresses:") - for email in emails: - print(email) - print("\n") +for email in emails: + print(email) From 7a33539c8ca0686a6f5c0c54e0bf00b3ebcf9992 Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Tue, 26 Jan 2016 14:00:00 +0100 Subject: [PATCH 11/24] added youtube sentiment --- 31_youtube_sentiment.py | 77 +++++++++++++++++++++++++++++++++++++++++ readme.md | 1 + requirements.txt | 1 + 3 files changed, 79 insertions(+) create mode 100644 31_youtube_sentiment.py diff --git a/31_youtube_sentiment.py b/31_youtube_sentiment.py new file mode 100644 index 0000000..f0f2129 --- /dev/null +++ b/31_youtube_sentiment.py @@ -0,0 +1,77 @@ +import sys +import requests +from bs4 import BeautifulSoup as bs4 + +""" +Example usage: + +$ python 31_youtube_sentiment.py https://www.youtube.com/watch?v=_vrAjAHhUsA +""" + + +def get_arguments(): + if len(sys.argv) is 2: + return sys.argv[1] + else: + print('Specify at least 1 argument') + sys.exit() + + +def get_comments(url): + html = requests.get('https://plus.googleapis.com/u/0/_/widget/render/comments?first_party_property=YOUTUBE&href=' + url) + soup = bs4(html.text, 'html.parser') + return [comment.string for comment in soup.findAll('div', class_='Ct')] + + +def calculate_sentiment(comments): + positive = 0 + negative = 0 + negative_words = [ + 'hate', 'hated', 'dislike', 'disliked', 'awful', 'terrible', 'bad', + 'painful', 'worst', 'suck', 'rubbish', 'sad', 'sodding' + ] + positive_words = [ + 'love', 'loved', 'like', 'liked', 'awesome', 'amazing', 'good', + 'great', 'excellent', 'brilliant', 'cool' + ] + for comment in comments: + if comment is None: + continue + else: + for word in comment.split(' '): + if word in negative_words: + negative += 1 + if word in positive_words: + positive += 1 + return {'positive': positive, 'negative': negative} + + +def main(): + url = get_arguments() + if url: + comments = get_comments(url) + if len(comments) <= 0: + print('This video has no comments.') + sys.exit() + sentiment = calculate_sentiment(comments) + positive_score = sentiment['positive'] + negative_score = sentiment['negative'] + total_score = positive_score + negative_score + if positive_score > negative_score: + print('This video is generally positive:') + print('{0} positive / {1} total hits'.format( + positive_score, total_score)) + elif negative_score > positive_score: + print('This video is generally negative:') + print ('{0} negative / {1} total hits'.format( + negative_score, total_score)) + else: + print('This video is mutual:') + print('{0} positive {1} negative'.format( + positive_score, negative_score)) + else: + print('No url supplied') + + +if __name__ == '__main__': + main() diff --git a/readme.md b/readme.md index 0bfba2f..363d345 100644 --- a/readme.md +++ b/readme.md @@ -30,3 +30,4 @@ 1. **28_income_tax_calculator.py**: Income tax calculator via [Taxee](http://taxee.io/) 1. **29_json_to_yaml.py**: Convert JSON to YAML 1. **30_fullcontact.py**: Call the [FullcContact](https://www.fullcontact.com/developer/) API +1. **31_youtube_sentiment.py**: Calculate sentiment score from the comments of a Youtube video diff --git a/requirements.txt b/requirements.txt index 9d61831..b65ee68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +beautifulsoup4==4.4.1 PyYAML==3.11 requests==2.7.0 wheel==0.24.0 From 7591683535eca836d8d86974635e9c7979ab7abd Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Mon, 1 Feb 2016 22:33:14 -0700 Subject: [PATCH 12/24] added stock prices script --- 32_stock_scraper.py | 38 ++++++++++++++++++++++++++++++++++++++ readme.md | 1 + 2 files changed, 39 insertions(+) create mode 100644 32_stock_scraper.py diff --git a/32_stock_scraper.py b/32_stock_scraper.py new file mode 100644 index 0000000..3bc2e7d --- /dev/null +++ b/32_stock_scraper.py @@ -0,0 +1,38 @@ +import urllib.request +from bs4 import BeautifulSoup + + +def get_stock_tickers(): + req = urllib.request.Request( + 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies') + page = urllib.request.urlopen(req) + soup = BeautifulSoup(page, 'html.parser') + table = soup.find('table', {'class': 'wikitable sortable'}) + tickers = [] + for row in table.findAll('tr'): + col = row.findAll('td') + if len(col) > 0: + tickers.append(str(col[0].string.strip())) + tickers.sort() + return tickers + + +def get_stock_prices(ticker_list): + for ticker in ticker_list: + htmlfile = urllib.request.urlopen( + "http://finance.yahoo.com/q?s={0}".format(ticker) + ) + htmltext = htmlfile.read() + soup = BeautifulSoup(htmltext, 'html.parser') + htmlSelector = 'yfs_l84_{0}'.format(ticker.lower()) + for price in soup.find_all(id=htmlSelector): + print('{0} is {1}'.format(ticker, price.text)) + + +def main(): + all_tickers = get_stock_tickers() + get_stock_prices(all_tickers) + + +if __name__ == '__main__': + main() diff --git a/readme.md b/readme.md index 363d345..926be9d 100644 --- a/readme.md +++ b/readme.md @@ -31,3 +31,4 @@ 1. **29_json_to_yaml.py**: Convert JSON to YAML 1. **30_fullcontact.py**: Call the [FullcContact](https://www.fullcontact.com/developer/) API 1. **31_youtube_sentiment.py**: Calculate sentiment score from the comments of a Youtube video +1. **32_stock_scraper.py**: Get stock prices From 6613b13d4dba5bd7b1dfbe6759e4f9f6773be63d Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Thu, 18 Feb 2016 15:08:55 -0700 Subject: [PATCH 13/24] updated email crawler --- 08_basic_email_web_crawler.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/08_basic_email_web_crawler.py b/08_basic_email_web_crawler.py index a7dbbce..b56c747 100644 --- a/08_basic_email_web_crawler.py +++ b/08_basic_email_web_crawler.py @@ -1,26 +1,21 @@ import requests import re -#get url -#url=input('Enter a URL (include 'http://'):')--this is wrong +# get url url = input('Enter a URL (include `http://`): ') +# connect to the url +website = requests.get(url) -#connect to the url -website=requests.get(url) +# read html +html = website.text -#read html -html=website.text - - -#use re.findall to grab all the links +# use re.findall to grab all the links links = re.findall('"((http|ftp)s?://.*?)"', html) +emails = re.findall('([\w\.,]+@[\w\.,]+\.\w+)', html) -emails=re.findall('([\w\.,]+@[\w\.,]+\.\w+)',html) - -#prints the number of links in the list +# print the number of links in the list print("\nFound {} links".format(len(links))) - for email in emails: - print(email) + print(email) From 352f777cc309ae74e5692b6872720c8057da0462 Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Wed, 23 Mar 2016 09:33:03 -0600 Subject: [PATCH 14/24] updated full contact script --- .gitignore | 1 + 30_fullcontact.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3873c3a..87031a3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ _tmp env venv __pycache__ +.env diff --git a/30_fullcontact.py b/30_fullcontact.py index a612648..3ee2822 100644 --- a/30_fullcontact.py +++ b/30_fullcontact.py @@ -1,3 +1,4 @@ +import os import sys import requests @@ -15,7 +16,7 @@ # constants -API_KEY = 'GET YOUR OWN' +API_KEY = os.environ.get('FULLCONTACT_API_KEY') BASE_URL = 'http://api.fullcontact.com/v2/person.json' From c3b10a3008aa5d7b356d70ddad4b373d9b975d98 Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sun, 28 Aug 2016 21:46:29 -0600 Subject: [PATCH 15/24] added country code to country name, continent name script --- 33_country_code.py | 51 ++ 33_country_codes.json | 1254 +++++++++++++++++++++++++++++++++++++++++ 33_sample_csv.csv | 109 ++++ data.csv | 107 ++++ 4 files changed, 1521 insertions(+) create mode 100644 33_country_code.py create mode 100644 33_country_codes.json create mode 100644 33_sample_csv.csv create mode 100644 data.csv diff --git a/33_country_code.py b/33_country_code.py new file mode 100644 index 0000000..134236c --- /dev/null +++ b/33_country_code.py @@ -0,0 +1,51 @@ +import csv +import sys +import json + +""" +Example usage: + +$ python 33_country_code.py 33_sample_csv.csv 33_country_codes.json +""" + + +def get_data(csv_file, json_file): + countryCodes = [] + countryNames = [] + continentNames = [] + with open(csv_file, 'rt') as file_one: + reader = csv.reader(file_one) + with open(json_file) as file_two: + json_data = json.load(file_two) + all_countries = json_data["country"] + for csv_row in reader: + for json_row in all_countries: + if csv_row[0] == json_row["countryCode"]: + countryCodes.append(json_row["countryCode"]) + countryNames.append(json_row["countryName"]) + continentNames.append(json_row["continentName"]) + + return [ + countryCodes, + countryNames, + continentNames + ] + + +def write_data(array_of_arrays): + with open('data.csv', 'wt') as csv_out: + writer = csv.writer(csv_out) + rows = zip( + array_of_arrays[0], + array_of_arrays[1], + array_of_arrays[2] + ) + for row in rows: + writer.writerow(row) + + +if __name__ == '__main__': + csv_file_name = sys.argv[1] + json_file_name = sys.argv[2] + data = get_data(csv_file_name, json_file_name) + write_data(data) diff --git a/33_country_codes.json b/33_country_codes.json new file mode 100644 index 0000000..4ec79e6 --- /dev/null +++ b/33_country_codes.json @@ -0,0 +1,1254 @@ +{ + "country": [ + { + "countryCode": "AD", + "countryName": "Andorra", + "continentName": "Europe" + }, + { + "countryCode": "AE", + "countryName": "United Arab Emirates", + "continentName": "Asia" + }, + { + "countryCode": "AF", + "countryName": "Afghanistan", + "continentName": "Asia" + }, + { + "countryCode": "AG", + "countryName": "Antigua and Barbuda", + "continentName": "North America" + }, + { + "countryCode": "AI", + "countryName": "Anguilla", + "continentName": "North America" + }, + { + "countryCode": "AL", + "countryName": "Albania", + "continentName": "Europe" + }, + { + "countryCode": "AM", + "countryName": "Armenia", + "continentName": "Asia" + }, + { + "countryCode": "AO", + "countryName": "Angola", + "continentName": "Africa" + }, + { + "countryCode": "AQ", + "countryName": "Antarctica", + "continentName": "Antarctica" + }, + { + "countryCode": "AR", + "countryName": "Argentina", + "continentName": "South America" + }, + { + "countryCode": "AS", + "countryName": "American Samoa", + "continentName": "Oceania" + }, + { + "countryCode": "AT", + "countryName": "Austria", + "continentName": "Europe" + }, + { + "countryCode": "AU", + "countryName": "Australia", + "continentName": "Oceania" + }, + { + "countryCode": "AW", + "countryName": "Aruba", + "continentName": "North America" + }, + { + "countryCode": "AX", + "countryName": "Åland", + "continentName": "Europe" + }, + { + "countryCode": "AZ", + "countryName": "Azerbaijan", + "continentName": "Asia" + }, + { + "countryCode": "BA", + "countryName": "Bosnia and Herzegovina", + "continentName": "Europe" + }, + { + "countryCode": "BB", + "countryName": "Barbados", + "continentName": "North America" + }, + { + "countryCode": "BD", + "countryName": "Bangladesh", + "continentName": "Asia" + }, + { + "countryCode": "BE", + "countryName": "Belgium", + "continentName": "Europe" + }, + { + "countryCode": "BF", + "countryName": "Burkina Faso", + "continentName": "Africa" + }, + { + "countryCode": "BG", + "countryName": "Bulgaria", + "continentName": "Europe" + }, + { + "countryCode": "BH", + "countryName": "Bahrain", + "continentName": "Asia" + }, + { + "countryCode": "BI", + "countryName": "Burundi", + "continentName": "Africa" + }, + { + "countryCode": "BJ", + "countryName": "Benin", + "continentName": "Africa" + }, + { + "countryCode": "BL", + "countryName": "Saint Barthélemy", + "continentName": "North America" + }, + { + "countryCode": "BM", + "countryName": "Bermuda", + "continentName": "North America" + }, + { + "countryCode": "BN", + "countryName": "Brunei", + "continentName": "Asia" + }, + { + "countryCode": "BO", + "countryName": "Bolivia", + "continentName": "South America" + }, + { + "countryCode": "BQ", + "countryName": "Bonaire", + "continentName": "North America" + }, + { + "countryCode": "BR", + "countryName": "Brazil", + "continentName": "South America" + }, + { + "countryCode": "BS", + "countryName": "Bahamas", + "continentName": "North America" + }, + { + "countryCode": "BT", + "countryName": "Bhutan", + "continentName": "Asia" + }, + { + "countryCode": "BV", + "countryName": "Bouvet Island", + "continentName": "Antarctica" + }, + { + "countryCode": "BW", + "countryName": "Botswana", + "continentName": "Africa" + }, + { + "countryCode": "BY", + "countryName": "Belarus", + "continentName": "Europe" + }, + { + "countryCode": "BZ", + "countryName": "Belize", + "continentName": "North America" + }, + { + "countryCode": "CA", + "countryName": "Canada", + "continentName": "North America" + }, + { + "countryCode": "CC", + "countryName": "Cocos [Keeling] Islands", + "continentName": "Asia" + }, + { + "countryCode": "CD", + "countryName": "Democratic Republic of the Congo", + "continentName": "Africa" + }, + { + "countryCode": "CF", + "countryName": "Central African Republic", + "continentName": "Africa" + }, + { + "countryCode": "CG", + "countryName": "Republic of the Congo", + "continentName": "Africa" + }, + { + "countryCode": "CH", + "countryName": "Switzerland", + "continentName": "Europe" + }, + { + "countryCode": "CI", + "countryName": "Ivory Coast", + "continentName": "Africa" + }, + { + "countryCode": "CK", + "countryName": "Cook Islands", + "continentName": "Oceania" + }, + { + "countryCode": "CL", + "countryName": "Chile", + "continentName": "South America" + }, + { + "countryCode": "CM", + "countryName": "Cameroon", + "continentName": "Africa" + }, + { + "countryCode": "CN", + "countryName": "China", + "continentName": "Asia" + }, + { + "countryCode": "CO", + "countryName": "Colombia", + "continentName": "South America" + }, + { + "countryCode": "CR", + "countryName": "Costa Rica", + "continentName": "North America" + }, + { + "countryCode": "CU", + "countryName": "Cuba", + "continentName": "North America" + }, + { + "countryCode": "CV", + "countryName": "Cape Verde", + "continentName": "Africa" + }, + { + "countryCode": "CW", + "countryName": "Curacao", + "continentName": "North America" + }, + { + "countryCode": "CX", + "countryName": "Christmas Island", + "continentName": "Asia" + }, + { + "countryCode": "CY", + "countryName": "Cyprus", + "continentName": "Europe" + }, + { + "countryCode": "CZ", + "countryName": "Czechia", + "continentName": "Europe" + }, + { + "countryCode": "DE", + "countryName": "Germany", + "continentName": "Europe" + }, + { + "countryCode": "DJ", + "countryName": "Djibouti", + "continentName": "Africa" + }, + { + "countryCode": "DK", + "countryName": "Denmark", + "continentName": "Europe" + }, + { + "countryCode": "DM", + "countryName": "Dominica", + "continentName": "North America" + }, + { + "countryCode": "DO", + "countryName": "Dominican Republic", + "continentName": "North America" + }, + { + "countryCode": "DZ", + "countryName": "Algeria", + "continentName": "Africa" + }, + { + "countryCode": "EC", + "countryName": "Ecuador", + "continentName": "South America" + }, + { + "countryCode": "EE", + "countryName": "Estonia", + "continentName": "Europe" + }, + { + "countryCode": "EG", + "countryName": "Egypt", + "continentName": "Africa" + }, + { + "countryCode": "EH", + "countryName": "Western Sahara", + "continentName": "Africa" + }, + { + "countryCode": "ER", + "countryName": "Eritrea", + "continentName": "Africa" + }, + { + "countryCode": "ES", + "countryName": "Spain", + "continentName": "Europe" + }, + { + "countryCode": "ET", + "countryName": "Ethiopia", + "continentName": "Africa" + }, + { + "countryCode": "FI", + "countryName": "Finland", + "continentName": "Europe" + }, + { + "countryCode": "FJ", + "countryName": "Fiji", + "continentName": "Oceania" + }, + { + "countryCode": "FK", + "countryName": "Falkland Islands", + "continentName": "South America" + }, + { + "countryCode": "FM", + "countryName": "Micronesia", + "continentName": "Oceania" + }, + { + "countryCode": "FO", + "countryName": "Faroe Islands", + "continentName": "Europe" + }, + { + "countryCode": "FR", + "countryName": "France", + "continentName": "Europe" + }, + { + "countryCode": "GA", + "countryName": "Gabon", + "continentName": "Africa" + }, + { + "countryCode": "GB", + "countryName": "United Kingdom", + "continentName": "Europe" + }, + { + "countryCode": "GD", + "countryName": "Grenada", + "continentName": "North America" + }, + { + "countryCode": "GE", + "countryName": "Georgia", + "continentName": "Asia" + }, + { + "countryCode": "GF", + "countryName": "French Guiana", + "continentName": "South America" + }, + { + "countryCode": "GG", + "countryName": "Guernsey", + "continentName": "Europe" + }, + { + "countryCode": "GH", + "countryName": "Ghana", + "continentName": "Africa" + }, + { + "countryCode": "GI", + "countryName": "Gibraltar", + "continentName": "Europe" + }, + { + "countryCode": "GL", + "countryName": "Greenland", + "continentName": "North America" + }, + { + "countryCode": "GM", + "countryName": "Gambia", + "continentName": "Africa" + }, + { + "countryCode": "GN", + "countryName": "Guinea", + "continentName": "Africa" + }, + { + "countryCode": "GP", + "countryName": "Guadeloupe", + "continentName": "North America" + }, + { + "countryCode": "GQ", + "countryName": "Equatorial Guinea", + "continentName": "Africa" + }, + { + "countryCode": "GR", + "countryName": "Greece", + "continentName": "Europe" + }, + { + "countryCode": "GS", + "countryName": "South Georgia and the South Sandwich Islands", + "continentName": "Antarctica" + }, + { + "countryCode": "GT", + "countryName": "Guatemala", + "continentName": "North America" + }, + { + "countryCode": "GU", + "countryName": "Guam", + "continentName": "Oceania" + }, + { + "countryCode": "GW", + "countryName": "Guinea-Bissau", + "continentName": "Africa" + }, + { + "countryCode": "GY", + "countryName": "Guyana", + "continentName": "South America" + }, + { + "countryCode": "HK", + "countryName": "Hong Kong", + "continentName": "Asia" + }, + { + "countryCode": "HM", + "countryName": "Heard Island and McDonald Islands", + "continentName": "Antarctica" + }, + { + "countryCode": "HN", + "countryName": "Honduras", + "continentName": "North America" + }, + { + "countryCode": "HR", + "countryName": "Croatia", + "continentName": "Europe" + }, + { + "countryCode": "HT", + "countryName": "Haiti", + "continentName": "North America" + }, + { + "countryCode": "HU", + "countryName": "Hungary", + "continentName": "Europe" + }, + { + "countryCode": "ID", + "countryName": "Indonesia", + "continentName": "Asia" + }, + { + "countryCode": "IE", + "countryName": "Ireland", + "continentName": "Europe" + }, + { + "countryCode": "IL", + "countryName": "Israel", + "continentName": "Asia" + }, + { + "countryCode": "IM", + "countryName": "Isle of Man", + "continentName": "Europe" + }, + { + "countryCode": "IN", + "countryName": "India", + "continentName": "Asia" + }, + { + "countryCode": "IO", + "countryName": "British Indian Ocean Territory", + "continentName": "Asia" + }, + { + "countryCode": "IQ", + "countryName": "Iraq", + "continentName": "Asia" + }, + { + "countryCode": "IR", + "countryName": "Iran", + "continentName": "Asia" + }, + { + "countryCode": "IS", + "countryName": "Iceland", + "continentName": "Europe" + }, + { + "countryCode": "IT", + "countryName": "Italy", + "continentName": "Europe" + }, + { + "countryCode": "JE", + "countryName": "Jersey", + "continentName": "Europe" + }, + { + "countryCode": "JM", + "countryName": "Jamaica", + "continentName": "North America" + }, + { + "countryCode": "JO", + "countryName": "Jordan", + "continentName": "Asia" + }, + { + "countryCode": "JP", + "countryName": "Japan", + "continentName": "Asia" + }, + { + "countryCode": "KE", + "countryName": "Kenya", + "continentName": "Africa" + }, + { + "countryCode": "KG", + "countryName": "Kyrgyzstan", + "continentName": "Asia" + }, + { + "countryCode": "KH", + "countryName": "Cambodia", + "continentName": "Asia" + }, + { + "countryCode": "KI", + "countryName": "Kiribati", + "continentName": "Oceania" + }, + { + "countryCode": "KM", + "countryName": "Comoros", + "continentName": "Africa" + }, + { + "countryCode": "KN", + "countryName": "Saint Kitts and Nevis", + "continentName": "North America" + }, + { + "countryCode": "KP", + "countryName": "North Korea", + "continentName": "Asia" + }, + { + "countryCode": "KR", + "countryName": "South Korea", + "continentName": "Asia" + }, + { + "countryCode": "KW", + "countryName": "Kuwait", + "continentName": "Asia" + }, + { + "countryCode": "KY", + "countryName": "Cayman Islands", + "continentName": "North America" + }, + { + "countryCode": "KZ", + "countryName": "Kazakhstan", + "continentName": "Asia" + }, + { + "countryCode": "LA", + "countryName": "Laos", + "continentName": "Asia" + }, + { + "countryCode": "LB", + "countryName": "Lebanon", + "continentName": "Asia" + }, + { + "countryCode": "LC", + "countryName": "Saint Lucia", + "continentName": "North America" + }, + { + "countryCode": "LI", + "countryName": "Liechtenstein", + "continentName": "Europe" + }, + { + "countryCode": "LK", + "countryName": "Sri Lanka", + "continentName": "Asia" + }, + { + "countryCode": "LR", + "countryName": "Liberia", + "continentName": "Africa" + }, + { + "countryCode": "LS", + "countryName": "Lesotho", + "continentName": "Africa" + }, + { + "countryCode": "LT", + "countryName": "Lithuania", + "continentName": "Europe" + }, + { + "countryCode": "LU", + "countryName": "Luxembourg", + "continentName": "Europe" + }, + { + "countryCode": "LV", + "countryName": "Latvia", + "continentName": "Europe" + }, + { + "countryCode": "LY", + "countryName": "Libya", + "continentName": "Africa" + }, + { + "countryCode": "MA", + "countryName": "Morocco", + "continentName": "Africa" + }, + { + "countryCode": "MC", + "countryName": "Monaco", + "continentName": "Europe" + }, + { + "countryCode": "MD", + "countryName": "Moldova", + "continentName": "Europe" + }, + { + "countryCode": "ME", + "countryName": "Montenegro", + "continentName": "Europe" + }, + { + "countryCode": "MF", + "countryName": "Saint Martin", + "continentName": "North America" + }, + { + "countryCode": "MG", + "countryName": "Madagascar", + "continentName": "Africa" + }, + { + "countryCode": "MH", + "countryName": "Marshall Islands", + "continentName": "Oceania" + }, + { + "countryCode": "MK", + "countryName": "Macedonia", + "continentName": "Europe" + }, + { + "countryCode": "ML", + "countryName": "Mali", + "continentName": "Africa" + }, + { + "countryCode": "MM", + "countryName": "Myanmar [Burma]", + "continentName": "Asia" + }, + { + "countryCode": "MN", + "countryName": "Mongolia", + "continentName": "Asia" + }, + { + "countryCode": "MO", + "countryName": "Macao", + "continentName": "Asia" + }, + { + "countryCode": "MP", + "countryName": "Northern Mariana Islands", + "continentName": "Oceania" + }, + { + "countryCode": "MQ", + "countryName": "Martinique", + "continentName": "North America" + }, + { + "countryCode": "MR", + "countryName": "Mauritania", + "continentName": "Africa" + }, + { + "countryCode": "MS", + "countryName": "Montserrat", + "continentName": "North America" + }, + { + "countryCode": "MT", + "countryName": "Malta", + "continentName": "Europe" + }, + { + "countryCode": "MU", + "countryName": "Mauritius", + "continentName": "Africa" + }, + { + "countryCode": "MV", + "countryName": "Maldives", + "continentName": "Asia" + }, + { + "countryCode": "MW", + "countryName": "Malawi", + "continentName": "Africa" + }, + { + "countryCode": "MX", + "countryName": "Mexico", + "continentName": "North America" + }, + { + "countryCode": "MY", + "countryName": "Malaysia", + "continentName": "Asia" + }, + { + "countryCode": "MZ", + "countryName": "Mozambique", + "continentName": "Africa" + }, + { + "countryCode": "NA", + "countryName": "Namibia", + "continentName": "Africa" + }, + { + "countryCode": "NC", + "countryName": "New Caledonia", + "continentName": "Oceania" + }, + { + "countryCode": "NE", + "countryName": "Niger", + "continentName": "Africa" + }, + { + "countryCode": "NF", + "countryName": "Norfolk Island", + "continentName": "Oceania" + }, + { + "countryCode": "NG", + "countryName": "Nigeria", + "continentName": "Africa" + }, + { + "countryCode": "NI", + "countryName": "Nicaragua", + "continentName": "North America" + }, + { + "countryCode": "NL", + "countryName": "Netherlands", + "continentName": "Europe" + }, + { + "countryCode": "NO", + "countryName": "Norway", + "continentName": "Europe" + }, + { + "countryCode": "NP", + "countryName": "Nepal", + "continentName": "Asia" + }, + { + "countryCode": "NR", + "countryName": "Nauru", + "continentName": "Oceania" + }, + { + "countryCode": "NU", + "countryName": "Niue", + "continentName": "Oceania" + }, + { + "countryCode": "NZ", + "countryName": "New Zealand", + "continentName": "Oceania" + }, + { + "countryCode": "OM", + "countryName": "Oman", + "continentName": "Asia" + }, + { + "countryCode": "PA", + "countryName": "Panama", + "continentName": "North America" + }, + { + "countryCode": "PE", + "countryName": "Peru", + "continentName": "South America" + }, + { + "countryCode": "PF", + "countryName": "French Polynesia", + "continentName": "Oceania" + }, + { + "countryCode": "PG", + "countryName": "Papua New Guinea", + "continentName": "Oceania" + }, + { + "countryCode": "PH", + "countryName": "Philippines", + "continentName": "Asia" + }, + { + "countryCode": "PK", + "countryName": "Pakistan", + "continentName": "Asia" + }, + { + "countryCode": "PL", + "countryName": "Poland", + "continentName": "Europe" + }, + { + "countryCode": "PM", + "countryName": "Saint Pierre and Miquelon", + "continentName": "North America" + }, + { + "countryCode": "PN", + "countryName": "Pitcairn Islands", + "continentName": "Oceania" + }, + { + "countryCode": "PR", + "countryName": "Puerto Rico", + "continentName": "North America" + }, + { + "countryCode": "PS", + "countryName": "Palestine", + "continentName": "Asia" + }, + { + "countryCode": "PT", + "countryName": "Portugal", + "continentName": "Europe" + }, + { + "countryCode": "PW", + "countryName": "Palau", + "continentName": "Oceania" + }, + { + "countryCode": "PY", + "countryName": "Paraguay", + "continentName": "South America" + }, + { + "countryCode": "QA", + "countryName": "Qatar", + "continentName": "Asia" + }, + { + "countryCode": "RE", + "countryName": "Réunion", + "continentName": "Africa" + }, + { + "countryCode": "RO", + "countryName": "Romania", + "continentName": "Europe" + }, + { + "countryCode": "RS", + "countryName": "Serbia", + "continentName": "Europe" + }, + { + "countryCode": "RU", + "countryName": "Russia", + "continentName": "Europe" + }, + { + "countryCode": "RW", + "countryName": "Rwanda", + "continentName": "Africa" + }, + { + "countryCode": "SA", + "countryName": "Saudi Arabia", + "continentName": "Asia" + }, + { + "countryCode": "SB", + "countryName": "Solomon Islands", + "continentName": "Oceania" + }, + { + "countryCode": "SC", + "countryName": "Seychelles", + "continentName": "Africa" + }, + { + "countryCode": "SD", + "countryName": "Sudan", + "continentName": "Africa" + }, + { + "countryCode": "SE", + "countryName": "Sweden", + "continentName": "Europe" + }, + { + "countryCode": "SG", + "countryName": "Singapore", + "continentName": "Asia" + }, + { + "countryCode": "SH", + "countryName": "Saint Helena", + "continentName": "Africa" + }, + { + "countryCode": "SI", + "countryName": "Slovenia", + "continentName": "Europe" + }, + { + "countryCode": "SJ", + "countryName": "Svalbard and Jan Mayen", + "continentName": "Europe" + }, + { + "countryCode": "SK", + "countryName": "Slovakia", + "continentName": "Europe" + }, + { + "countryCode": "SL", + "countryName": "Sierra Leone", + "continentName": "Africa" + }, + { + "countryCode": "SM", + "countryName": "San Marino", + "continentName": "Europe" + }, + { + "countryCode": "SN", + "countryName": "Senegal", + "continentName": "Africa" + }, + { + "countryCode": "SO", + "countryName": "Somalia", + "continentName": "Africa" + }, + { + "countryCode": "SR", + "countryName": "Suriname", + "continentName": "South America" + }, + { + "countryCode": "SS", + "countryName": "South Sudan", + "continentName": "Africa" + }, + { + "countryCode": "ST", + "countryName": "São Tomé and Príncipe", + "continentName": "Africa" + }, + { + "countryCode": "SV", + "countryName": "El Salvador", + "continentName": "North America" + }, + { + "countryCode": "SX", + "countryName": "Sint Maarten", + "continentName": "North America" + }, + { + "countryCode": "SY", + "countryName": "Syria", + "continentName": "Asia" + }, + { + "countryCode": "SZ", + "countryName": "Swaziland", + "continentName": "Africa" + }, + { + "countryCode": "TC", + "countryName": "Turks and Caicos Islands", + "continentName": "North America" + }, + { + "countryCode": "TD", + "countryName": "Chad", + "continentName": "Africa" + }, + { + "countryCode": "TF", + "countryName": "French Southern Territories", + "continentName": "Antarctica" + }, + { + "countryCode": "TG", + "countryName": "Togo", + "continentName": "Africa" + }, + { + "countryCode": "TH", + "countryName": "Thailand", + "continentName": "Asia" + }, + { + "countryCode": "TJ", + "countryName": "Tajikistan", + "continentName": "Asia" + }, + { + "countryCode": "TK", + "countryName": "Tokelau", + "continentName": "Oceania" + }, + { + "countryCode": "TL", + "countryName": "East Timor", + "continentName": "Oceania" + }, + { + "countryCode": "TM", + "countryName": "Turkmenistan", + "continentName": "Asia" + }, + { + "countryCode": "TN", + "countryName": "Tunisia", + "continentName": "Africa" + }, + { + "countryCode": "TO", + "countryName": "Tonga", + "continentName": "Oceania" + }, + { + "countryCode": "TR", + "countryName": "Turkey", + "continentName": "Asia" + }, + { + "countryCode": "TT", + "countryName": "Trinidad and Tobago", + "continentName": "North America" + }, + { + "countryCode": "TV", + "countryName": "Tuvalu", + "continentName": "Oceania" + }, + { + "countryCode": "TW", + "countryName": "Taiwan", + "continentName": "Asia" + }, + { + "countryCode": "TZ", + "countryName": "Tanzania", + "continentName": "Africa" + }, + { + "countryCode": "UA", + "countryName": "Ukraine", + "continentName": "Europe" + }, + { + "countryCode": "UG", + "countryName": "Uganda", + "continentName": "Africa" + }, + { + "countryCode": "UM", + "countryName": "U.S. Minor Outlying Islands", + "continentName": "Oceania" + }, + { + "countryCode": "US", + "countryName": "United States", + "continentName": "North America" + }, + { + "countryCode": "UY", + "countryName": "Uruguay", + "continentName": "South America" + }, + { + "countryCode": "UZ", + "countryName": "Uzbekistan", + "continentName": "Asia" + }, + { + "countryCode": "VA", + "countryName": "Vatican City", + "continentName": "Europe" + }, + { + "countryCode": "VC", + "countryName": "Saint Vincent and the Grenadines", + "continentName": "North America" + }, + { + "countryCode": "VE", + "countryName": "Venezuela", + "continentName": "South America" + }, + { + "countryCode": "VG", + "countryName": "British Virgin Islands", + "continentName": "North America" + }, + { + "countryCode": "VI", + "countryName": "U.S. Virgin Islands", + "continentName": "North America" + }, + { + "countryCode": "VN", + "countryName": "Vietnam", + "continentName": "Asia" + }, + { + "countryCode": "VU", + "countryName": "Vanuatu", + "continentName": "Oceania" + }, + { + "countryCode": "WF", + "countryName": "Wallis and Futuna", + "continentName": "Oceania" + }, + { + "countryCode": "WS", + "countryName": "Samoa", + "continentName": "Oceania" + }, + { + "countryCode": "XK", + "countryName": "Kosovo", + "continentName": "Europe" + }, + { + "countryCode": "YE", + "countryName": "Yemen", + "continentName": "Asia" + }, + { + "countryCode": "YT", + "countryName": "Mayotte", + "continentName": "Africa" + }, + { + "countryCode": "ZA", + "countryName": "South Africa", + "continentName": "Africa" + }, + { + "countryCode": "ZM", + "countryName": "Zambia", + "continentName": "Africa" + }, + { + "countryCode": "ZW", + "countryName": "Zimbabwe", + "continentName": "Africa" + } + ] +} diff --git a/33_sample_csv.csv b/33_sample_csv.csv new file mode 100644 index 0000000..62b23a2 --- /dev/null +++ b/33_sample_csv.csv @@ -0,0 +1,109 @@ +A2 +AE +AL +AP +AR +AT +AU +AZ +BA +BD +BE +BG +BH +BN +BR +BY +CA +CH +CL +CN +CO +CR +CW +CY +CZ +DE +DK +DO +EC +EE +ES +FI +FR +GB +GE +GG +GH +GI +GR +GT +HK +HR +HT +HU +ID +IE +IL +IN +IS +IT +JM +JO +JP +KE +KG +KR +KW +KY +KZ +LA +LB +LK +LT +LU +LV +MD +MG +MK +MO +MT +MV +MX +MY +NC +NG +NI +NL +NO +NP +NZ +OM +PA +PE +PH +PK +PL +PR +PT +PY +RO +RS +RU +SA +SE +SG +SI +SK +SO +TH +TR +TW +TZ +UA +US +UY +VN +VU +ZA +ZW diff --git a/data.csv b/data.csv new file mode 100644 index 0000000..3f0c610 --- /dev/null +++ b/data.csv @@ -0,0 +1,107 @@ +AE,United Arab Emirates,Asia +AL,Albania,Europe +AR,Argentina,South America +AT,Austria,Europe +AU,Australia,Oceania +AZ,Azerbaijan,Asia +BA,Bosnia and Herzegovina,Europe +BD,Bangladesh,Asia +BE,Belgium,Europe +BG,Bulgaria,Europe +BH,Bahrain,Asia +BN,Brunei,Asia +BR,Brazil,South America +BY,Belarus,Europe +CA,Canada,North America +CH,Switzerland,Europe +CL,Chile,South America +CN,China,Asia +CO,Colombia,South America +CR,Costa Rica,North America +CW,Curacao,North America +CY,Cyprus,Europe +CZ,Czechia,Europe +DE,Germany,Europe +DK,Denmark,Europe +DO,Dominican Republic,North America +EC,Ecuador,South America +EE,Estonia,Europe +ES,Spain,Europe +FI,Finland,Europe +FR,France,Europe +GB,United Kingdom,Europe +GE,Georgia,Asia +GG,Guernsey,Europe +GH,Ghana,Africa +GI,Gibraltar,Europe +GR,Greece,Europe +GT,Guatemala,North America +HK,Hong Kong,Asia +HR,Croatia,Europe +HT,Haiti,North America +HU,Hungary,Europe +ID,Indonesia,Asia +IE,Ireland,Europe +IL,Israel,Asia +IN,India,Asia +IS,Iceland,Europe +IT,Italy,Europe +JM,Jamaica,North America +JO,Jordan,Asia +JP,Japan,Asia +KE,Kenya,Africa +KG,Kyrgyzstan,Asia +KR,South Korea,Asia +KW,Kuwait,Asia +KY,Cayman Islands,North America +KZ,Kazakhstan,Asia +LA,Laos,Asia +LB,Lebanon,Asia +LK,Sri Lanka,Asia +LT,Lithuania,Europe +LU,Luxembourg,Europe +LV,Latvia,Europe +MD,Moldova,Europe +MG,Madagascar,Africa +MK,Macedonia,Europe +MO,Macao,Asia +MT,Malta,Europe +MV,Maldives,Asia +MX,Mexico,North America +MY,Malaysia,Asia +NC,New Caledonia,Oceania +NG,Nigeria,Africa +NI,Nicaragua,North America +NL,Netherlands,Europe +NO,Norway,Europe +NP,Nepal,Asia +NZ,New Zealand,Oceania +OM,Oman,Asia +PA,Panama,North America +PE,Peru,South America +PH,Philippines,Asia +PK,Pakistan,Asia +PL,Poland,Europe +PR,Puerto Rico,North America +PT,Portugal,Europe +PY,Paraguay,South America +RO,Romania,Europe +RS,Serbia,Europe +RU,Russia,Europe +SA,Saudi Arabia,Asia +SE,Sweden,Europe +SG,Singapore,Asia +SI,Slovenia,Europe +SK,Slovakia,Europe +SO,Somalia,Africa +TH,Thailand,Asia +TR,Turkey,Asia +TW,Taiwan,Asia +TZ,Tanzania,Africa +UA,Ukraine,Europe +US,United States,North America +UY,Uruguay,South America +VN,Vietnam,Asia +VU,Vanuatu,Oceania +ZA,South Africa,Africa +ZW,Zimbabwe,Africa From 4e54276ad46cd11a781b3525221ee719a0895a48 Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sat, 17 Dec 2016 09:22:43 -0700 Subject: [PATCH 16/24] updated folder structure --- 01_remove_all_pyc.md => scripts/01_remove_all_pyc.md | 0 02_find_all_links.py => scripts/02_find_all_links.py | 0 .../03_simple_twitter_manager.py | 0 04_rename_with_slice.py => scripts/04_rename_with_slice.py | 0 .../05_load_json_without_dupes.py | 0 06_execution_time.py => scripts/06_execution_time.py | 0 .../07_benchmark_permissions_loading_django.py | 0 .../08_basic_email_web_crawler.py | 0 .../09_basic_link_web_crawler.py | 0 .../10_find_files_recursively.py | 4 ++-- .../11_optimize_images_with_wand.py | 0 12_csv_split.py => scripts/12_csv_split.py | 0 12_sample_csv.csv => scripts/12_sample_csv.csv | 0 .../13_random_name_generator.py | 0 14_html_to_markdown.sh => scripts/14_html_to_markdown.sh | 0 .../15_check_my_environment.py | 0 16_jinja_quick_load.py => scripts/16_jinja_quick_load.py | 0 .../17_rewrite_git_history.md | 0 18_zipper.py => scripts/18_zipper.py | 0 19_tsv-to-csv.py => scripts/19_tsv-to-csv.py | 0 .../20_restore_file_from_git.py | 0 21_twitter_bot.py => scripts/21_twitter_bot.py | 0 22_git_tag.py => scripts/22_git_tag.py | 0 23_flask_session_test.py => scripts/23_flask_session_test.py | 0 24_sql2csv.py => scripts/24_sql2csv.py | 0 25_ip2geolocation.py => scripts/25_ip2geolocation.py | 0 25_sample_csv.csv => scripts/25_sample_csv.csv | 0 26_stock_scraper.py => scripts/26_stock_scraper.py | 0 27_send_sms.py => scripts/27_send_sms.py | 0 .../28_income_tax_calculator.py | 0 29_json_test.json => scripts/29_json_test.json | 0 29_json_to_yaml.py => scripts/29_json_to_yaml.py | 0 30_fullcontact.py => scripts/30_fullcontact.py | 0 31_youtube_sentiment.py => scripts/31_youtube_sentiment.py | 0 32_stock_scraper.py => scripts/32_stock_scraper.py | 0 33_country_code.py => scripts/33_country_code.py | 0 33_country_codes.json => scripts/33_country_codes.json | 0 33_sample_csv.csv => scripts/33_sample_csv.csv | 0 data.csv => scripts/data.csv | 0 39 files changed, 2 insertions(+), 2 deletions(-) rename 01_remove_all_pyc.md => scripts/01_remove_all_pyc.md (100%) rename 02_find_all_links.py => scripts/02_find_all_links.py (100%) rename 03_simple_twitter_manager.py => scripts/03_simple_twitter_manager.py (100%) rename 04_rename_with_slice.py => scripts/04_rename_with_slice.py (100%) rename 05_load_json_without_dupes.py => scripts/05_load_json_without_dupes.py (100%) rename 06_execution_time.py => scripts/06_execution_time.py (100%) rename 07_benchmark_permissions_loading_django.py => scripts/07_benchmark_permissions_loading_django.py (100%) rename 08_basic_email_web_crawler.py => scripts/08_basic_email_web_crawler.py (100%) rename 09_basic_link_web_crawler.py => scripts/09_basic_link_web_crawler.py (100%) rename 10_find_files_recursively.py => scripts/10_find_files_recursively.py (92%) rename 11_optimize_images_with_wand.py => scripts/11_optimize_images_with_wand.py (100%) rename 12_csv_split.py => scripts/12_csv_split.py (100%) rename 12_sample_csv.csv => scripts/12_sample_csv.csv (100%) rename 13_random_name_generator.py => scripts/13_random_name_generator.py (100%) rename 14_html_to_markdown.sh => scripts/14_html_to_markdown.sh (100%) rename 15_check_my_environment.py => scripts/15_check_my_environment.py (100%) rename 16_jinja_quick_load.py => scripts/16_jinja_quick_load.py (100%) rename 17_rewrite_git_history.md => scripts/17_rewrite_git_history.md (100%) rename 18_zipper.py => scripts/18_zipper.py (100%) rename 19_tsv-to-csv.py => scripts/19_tsv-to-csv.py (100%) rename 20_restore_file_from_git.py => scripts/20_restore_file_from_git.py (100%) rename 21_twitter_bot.py => scripts/21_twitter_bot.py (100%) rename 22_git_tag.py => scripts/22_git_tag.py (100%) rename 23_flask_session_test.py => scripts/23_flask_session_test.py (100%) rename 24_sql2csv.py => scripts/24_sql2csv.py (100%) rename 25_ip2geolocation.py => scripts/25_ip2geolocation.py (100%) rename 25_sample_csv.csv => scripts/25_sample_csv.csv (100%) rename 26_stock_scraper.py => scripts/26_stock_scraper.py (100%) rename 27_send_sms.py => scripts/27_send_sms.py (100%) rename 28_income_tax_calculator.py => scripts/28_income_tax_calculator.py (100%) rename 29_json_test.json => scripts/29_json_test.json (100%) rename 29_json_to_yaml.py => scripts/29_json_to_yaml.py (100%) rename 30_fullcontact.py => scripts/30_fullcontact.py (100%) rename 31_youtube_sentiment.py => scripts/31_youtube_sentiment.py (100%) rename 32_stock_scraper.py => scripts/32_stock_scraper.py (100%) rename 33_country_code.py => scripts/33_country_code.py (100%) rename 33_country_codes.json => scripts/33_country_codes.json (100%) rename 33_sample_csv.csv => scripts/33_sample_csv.csv (100%) rename data.csv => scripts/data.csv (100%) diff --git a/01_remove_all_pyc.md b/scripts/01_remove_all_pyc.md similarity index 100% rename from 01_remove_all_pyc.md rename to scripts/01_remove_all_pyc.md diff --git a/02_find_all_links.py b/scripts/02_find_all_links.py similarity index 100% rename from 02_find_all_links.py rename to scripts/02_find_all_links.py diff --git a/03_simple_twitter_manager.py b/scripts/03_simple_twitter_manager.py similarity index 100% rename from 03_simple_twitter_manager.py rename to scripts/03_simple_twitter_manager.py diff --git a/04_rename_with_slice.py b/scripts/04_rename_with_slice.py similarity index 100% rename from 04_rename_with_slice.py rename to scripts/04_rename_with_slice.py diff --git a/05_load_json_without_dupes.py b/scripts/05_load_json_without_dupes.py similarity index 100% rename from 05_load_json_without_dupes.py rename to scripts/05_load_json_without_dupes.py diff --git a/06_execution_time.py b/scripts/06_execution_time.py similarity index 100% rename from 06_execution_time.py rename to scripts/06_execution_time.py diff --git a/07_benchmark_permissions_loading_django.py b/scripts/07_benchmark_permissions_loading_django.py similarity index 100% rename from 07_benchmark_permissions_loading_django.py rename to scripts/07_benchmark_permissions_loading_django.py diff --git a/08_basic_email_web_crawler.py b/scripts/08_basic_email_web_crawler.py similarity index 100% rename from 08_basic_email_web_crawler.py rename to scripts/08_basic_email_web_crawler.py diff --git a/09_basic_link_web_crawler.py b/scripts/09_basic_link_web_crawler.py similarity index 100% rename from 09_basic_link_web_crawler.py rename to scripts/09_basic_link_web_crawler.py diff --git a/10_find_files_recursively.py b/scripts/10_find_files_recursively.py similarity index 92% rename from 10_find_files_recursively.py rename to scripts/10_find_files_recursively.py index 91cd73c..0c8e1eb 100755 --- a/10_find_files_recursively.py +++ b/scripts/10_find_files_recursively.py @@ -3,7 +3,7 @@ # constants PATH = './' -PATTERN = '*.py' +PATTERN = '*.md' def get_file_names(filepath, pattern): @@ -28,4 +28,4 @@ def output_files(list_of_files): if __name__ == '__main__': - all_files = get_file_names(PATH, PATTERN) + get_file_names(PATH, PATTERN) diff --git a/11_optimize_images_with_wand.py b/scripts/11_optimize_images_with_wand.py similarity index 100% rename from 11_optimize_images_with_wand.py rename to scripts/11_optimize_images_with_wand.py diff --git a/12_csv_split.py b/scripts/12_csv_split.py similarity index 100% rename from 12_csv_split.py rename to scripts/12_csv_split.py diff --git a/12_sample_csv.csv b/scripts/12_sample_csv.csv similarity index 100% rename from 12_sample_csv.csv rename to scripts/12_sample_csv.csv diff --git a/13_random_name_generator.py b/scripts/13_random_name_generator.py similarity index 100% rename from 13_random_name_generator.py rename to scripts/13_random_name_generator.py diff --git a/14_html_to_markdown.sh b/scripts/14_html_to_markdown.sh similarity index 100% rename from 14_html_to_markdown.sh rename to scripts/14_html_to_markdown.sh diff --git a/15_check_my_environment.py b/scripts/15_check_my_environment.py similarity index 100% rename from 15_check_my_environment.py rename to scripts/15_check_my_environment.py diff --git a/16_jinja_quick_load.py b/scripts/16_jinja_quick_load.py similarity index 100% rename from 16_jinja_quick_load.py rename to scripts/16_jinja_quick_load.py diff --git a/17_rewrite_git_history.md b/scripts/17_rewrite_git_history.md similarity index 100% rename from 17_rewrite_git_history.md rename to scripts/17_rewrite_git_history.md diff --git a/18_zipper.py b/scripts/18_zipper.py similarity index 100% rename from 18_zipper.py rename to scripts/18_zipper.py diff --git a/19_tsv-to-csv.py b/scripts/19_tsv-to-csv.py similarity index 100% rename from 19_tsv-to-csv.py rename to scripts/19_tsv-to-csv.py diff --git a/20_restore_file_from_git.py b/scripts/20_restore_file_from_git.py similarity index 100% rename from 20_restore_file_from_git.py rename to scripts/20_restore_file_from_git.py diff --git a/21_twitter_bot.py b/scripts/21_twitter_bot.py similarity index 100% rename from 21_twitter_bot.py rename to scripts/21_twitter_bot.py diff --git a/22_git_tag.py b/scripts/22_git_tag.py similarity index 100% rename from 22_git_tag.py rename to scripts/22_git_tag.py diff --git a/23_flask_session_test.py b/scripts/23_flask_session_test.py similarity index 100% rename from 23_flask_session_test.py rename to scripts/23_flask_session_test.py diff --git a/24_sql2csv.py b/scripts/24_sql2csv.py similarity index 100% rename from 24_sql2csv.py rename to scripts/24_sql2csv.py diff --git a/25_ip2geolocation.py b/scripts/25_ip2geolocation.py similarity index 100% rename from 25_ip2geolocation.py rename to scripts/25_ip2geolocation.py diff --git a/25_sample_csv.csv b/scripts/25_sample_csv.csv similarity index 100% rename from 25_sample_csv.csv rename to scripts/25_sample_csv.csv diff --git a/26_stock_scraper.py b/scripts/26_stock_scraper.py similarity index 100% rename from 26_stock_scraper.py rename to scripts/26_stock_scraper.py diff --git a/27_send_sms.py b/scripts/27_send_sms.py similarity index 100% rename from 27_send_sms.py rename to scripts/27_send_sms.py diff --git a/28_income_tax_calculator.py b/scripts/28_income_tax_calculator.py similarity index 100% rename from 28_income_tax_calculator.py rename to scripts/28_income_tax_calculator.py diff --git a/29_json_test.json b/scripts/29_json_test.json similarity index 100% rename from 29_json_test.json rename to scripts/29_json_test.json diff --git a/29_json_to_yaml.py b/scripts/29_json_to_yaml.py similarity index 100% rename from 29_json_to_yaml.py rename to scripts/29_json_to_yaml.py diff --git a/30_fullcontact.py b/scripts/30_fullcontact.py similarity index 100% rename from 30_fullcontact.py rename to scripts/30_fullcontact.py diff --git a/31_youtube_sentiment.py b/scripts/31_youtube_sentiment.py similarity index 100% rename from 31_youtube_sentiment.py rename to scripts/31_youtube_sentiment.py diff --git a/32_stock_scraper.py b/scripts/32_stock_scraper.py similarity index 100% rename from 32_stock_scraper.py rename to scripts/32_stock_scraper.py diff --git a/33_country_code.py b/scripts/33_country_code.py similarity index 100% rename from 33_country_code.py rename to scripts/33_country_code.py diff --git a/33_country_codes.json b/scripts/33_country_codes.json similarity index 100% rename from 33_country_codes.json rename to scripts/33_country_codes.json diff --git a/33_sample_csv.csv b/scripts/33_sample_csv.csv similarity index 100% rename from 33_sample_csv.csv rename to scripts/33_sample_csv.csv diff --git a/data.csv b/scripts/data.csv similarity index 100% rename from data.csv rename to scripts/data.csv From 14147e3dd0de7b7262e4617a37faaf0e05825665 Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sat, 17 Dec 2016 09:25:32 -0700 Subject: [PATCH 17/24] updated requirements.txt and added a todo --- TODO.md | 4 ++++ requirements.txt | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 TODO.md diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..598d675 --- /dev/null +++ b/TODO.md @@ -0,0 +1,4 @@ +1. Write unit and integration tests for *all* scripts +1. Add Travis +1. Add support for Python 2.7, 3.5, and 3.6 +1. Organize docs and folder structure better diff --git a/requirements.txt b/requirements.txt index b65ee68..2ad222b 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ beautifulsoup4==4.4.1 PyYAML==3.11 -requests==2.7.0 +requests==2.12.4 wheel==0.24.0 From 52cea88b4f524eddfee52ccb98db0e5bbd47d455 Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sat, 17 Dec 2016 09:28:23 -0700 Subject: [PATCH 18/24] added new script, updated readme, updated todo: --- TODO.md | 2 ++ readme.md | 1 + scripts/34_git_all_repos.py | 42 +++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+) create mode 100644 scripts/34_git_all_repos.py diff --git a/TODO.md b/TODO.md index 598d675..9358b94 100644 --- a/TODO.md +++ b/TODO.md @@ -2,3 +2,5 @@ 1. Add Travis 1. Add support for Python 2.7, 3.5, and 3.6 1. Organize docs and folder structure better +1. Add all scripts to single CLI for easy running, testing, and searching +1. Add License diff --git a/readme.md b/readme.md index 926be9d..59d5c64 100755 --- a/readme.md +++ b/readme.md @@ -32,3 +32,4 @@ 1. **30_fullcontact.py**: Call the [FullcContact](https://www.fullcontact.com/developer/) API 1. **31_youtube_sentiment.py**: Calculate sentiment score from the comments of a Youtube video 1. **32_stock_scraper.py**: Get stock prices +1. **34_git_all_repos.py**: Clone all repositories from a public user or organization on Github. Usage: `python git_all_repos.py users USER_NAME` or `python git_all_repos.py orgs ORG_NAME` diff --git a/scripts/34_git_all_repos.py b/scripts/34_git_all_repos.py new file mode 100644 index 0000000..b3e2d5b --- /dev/null +++ b/scripts/34_git_all_repos.py @@ -0,0 +1,42 @@ +import sys +import os +import requests + + +def get_total_repos(group, name): + repo_urls = [] + page = 1 + while True: + url = 'https://api.github.com/%7B0%7D/%7B1%7D/repos?per_page=100&page={2}' + r = requests.get(url.format(group, name, page)) + if r.status_code == 200: + rdata = r.json() + for repo in rdata: + repo_urls.append(repo['clone_url']) + if (len(rdata) >= 100): + page += 1 + else: + print('Found {0} repos.'.format(len(repo_urls))) + break + else: + print(r) + return False + return repo_urls + + +def clone_repos(all_repos): + count = 1 + print('Cloning...') + for repo in all_repos: + os.system('Git clone ' + repo) + print('Completed repo #{0} of {1}'.format(count, len(all_repos))) + count += 1 + +if __name__ == '__main__': + if len(sys.argv) > 2: + total = get_total_repos(sys.argv[1], sys.argv[2]) + if total: + clone_repos(total) + + else: + print('Usage: python USERS_OR_ORG GITHUB_USER_OR_ORG-NAME') From 2711f0ba1d08e28640653a5bfea1d0ca2b076ecf Mon Sep 17 00:00:00 2001 From: Skvm Date: Wed, 16 Aug 2017 19:19:48 +0100 Subject: [PATCH 19/24] Update requirements.txt 26_stock_scraper requires lxml --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 2ad222b..beee86c 100755 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ beautifulsoup4==4.4.1 PyYAML==3.11 requests==2.12.4 wheel==0.24.0 +lxml==3.8.0 From 49153f1cd98ca9bd0102db41c010d76780fa7222 Mon Sep 17 00:00:00 2001 From: chetanya-shrimali Date: Sat, 16 Sep 2017 21:05:46 +0530 Subject: [PATCH 20/24] added license --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6f2e061 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Chetanya-Shrimali + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From b060e39b6111c41e3d5cef906ffb138726875f00 Mon Sep 17 00:00:00 2001 From: Chetanya-Shrimali Date: Sat, 16 Sep 2017 21:09:03 +0530 Subject: [PATCH 21/24] Update LICENSE --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 6f2e061..2ae0d76 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2017 Chetanya-Shrimali +Copyright (c) 2017 realpython Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 8eb073544ac2d5866c2f86528c03b11409eec1cd Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sat, 16 Sep 2017 09:47:51 -0600 Subject: [PATCH 22/24] Update TODO.md --- TODO.md | 1 - 1 file changed, 1 deletion(-) diff --git a/TODO.md b/TODO.md index 9358b94..9d2dfea 100644 --- a/TODO.md +++ b/TODO.md @@ -3,4 +3,3 @@ 1. Add support for Python 2.7, 3.5, and 3.6 1. Organize docs and folder structure better 1. Add all scripts to single CLI for easy running, testing, and searching -1. Add License From 5b745282a923bee43b1bd3ee972a2351637b45ae Mon Sep 17 00:00:00 2001 From: uttamo Date: Thu, 28 Sep 2017 01:02:29 +0100 Subject: [PATCH 23/24] Use random.choice instead of random.randint random.choice is better for selecting random elements from a list than using random.randint to generate random indices to select --- scripts/13_random_name_generator.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/scripts/13_random_name_generator.py b/scripts/13_random_name_generator.py index 6f0a00a..0acb9bf 100755 --- a/scripts/13_random_name_generator.py +++ b/scripts/13_random_name_generator.py @@ -1,4 +1,4 @@ -from random import randint +from random import choice def random_name_generator(first, second, x): @@ -10,13 +10,8 @@ def random_name_generator(first, second, x): - number of random names """ names = [] - for i in range(0, int(x)): - random_first = randint(0, len(first)-1) - random_last = randint(0, len(second)-1) - names.append("{0} {1}".format( - first[random_first], - second[random_last]) - ) + for i in range(x): + names.append("{0} {1}".format(choice(first), choice(second))) return set(names) From cb448c2dc3593dbfbe1ca47b49193b320115aae5 Mon Sep 17 00:00:00 2001 From: Michael Herman Date: Sun, 25 Mar 2018 08:56:49 -0600 Subject: [PATCH 24/24] updated readme --- LICENSE | 2 +- readme.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 2ae0d76..3a6fa11 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2017 realpython +Copyright (c) 2018 Real Python Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/readme.md b/readme.md index 59d5c64..78f4a72 100755 --- a/readme.md +++ b/readme.md @@ -32,4 +32,5 @@ 1. **30_fullcontact.py**: Call the [FullcContact](https://www.fullcontact.com/developer/) API 1. **31_youtube_sentiment.py**: Calculate sentiment score from the comments of a Youtube video 1. **32_stock_scraper.py**: Get stock prices +1. **33_country_code.py**: Convert country code to country name 1. **34_git_all_repos.py**: Clone all repositories from a public user or organization on Github. Usage: `python git_all_repos.py users USER_NAME` or `python git_all_repos.py orgs ORG_NAME`