Skip to content

Commit 0bb4c8c

Browse files
committed
added stock scraper, converted all scripts to python 2/3 compatibility
1 parent 5bb3679 commit 0bb4c8c

22 files changed

+129
-72
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
.pyc
22
.DS_Store
3-
_tmp
3+
_tmp
4+
env
5+
__pycache__

02_find_all_links.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
1-
import urllib2
1+
import requests
22
import re
33

44
# get url
5-
url =raw_input('Enter a URL (include `http://`): ')
5+
url = input('Enter a URL (include `http://`): ')
66

77
# connect to the url
8-
website = urllib2.urlopen(url)
8+
website = requests.get(url)
99

1010
# read html
11-
html = website.read()
11+
html = website.text
1212

1313
# use re.findall to grab all the links
1414
links = re.findall('"((http|ftp)s?://.*?)"', html)
1515

1616
# output links
1717
for link in links:
18-
print link[0]
18+
print(link[0])

03_simple_twitter_manager.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,28 @@
11
import twitter
2-
3-
2+
3+
44
TWITTER_CONSUMER_KEY = 'XXX'
55
TWITTER_CONSUMER_SECRET = 'XXX'
66
TWITTER_ACCESS_TOKEN_KEY = 'XXX'
77
TWITTER_ACCESS_TOKEN_SECRET = 'XXX'
8-
8+
99
twitter_api = twitter.Api(
1010
consumer_key=TWITTER_CONSUMER_KEY,
1111
consumer_secret=TWITTER_CONSUMER_SECRET,
1212
access_token_key=TWITTER_ACCESS_TOKEN_KEY,
1313
access_token_secret=TWITTER_ACCESS_TOKEN_SECRET
1414
)
15-
15+
1616
if __name__ == '__main__':
1717
follower_ids = twitter_api.GetFollowerIDs()
1818
following_ids = twitter_api.GetFriendIDs()
19-
zombie_follows = [following_id for following_id in following_ids if following_id not in follower_ids]
20-
21-
confirm = raw_input("Are you sure you want to unfollow %s tweeps [y|n]? " % (len(zombie_follows)))
19+
zombie_follows = [following_id for following_id in
20+
following_ids if following_id not in follower_ids]
21+
22+
confirm = raw_input(
23+
"Are you sure you want to unfollow {0} tweeps [y|n]? ".format(
24+
(len(zombie_follows))))
2225
if confirm.lower() == 'y':
2326
for id in zombie_follows:
2427
user = twitter_api.DestroyFriendship(user_id=id)
25-
print "Unfollowed %s" % (user.screen_name)
28+
print("Unfollowed {0}".format(user.screen_name))

04_rename_with_slice.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
new_file_name = file_name[:-6] + extension
99
try:
1010
os.rename(file, new_file_name)
11-
except OSError, e:
12-
print e
11+
except OSError as e:
12+
print(e)
1313
else:
14-
print "Renamed {} to {}".format(file, new_file_name)
14+
print("Renamed {} to {}".format(file, new_file_name))

05_load_json_without_dupes.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
import json
2-
31
def dict_raise_on_duplicates(ordered_pairs):
42
"""reject duplicate keys"""
53
my_dict = dict()
64
for key, values in ordered_pairs:
75
if key in my_dict:
8-
raise ValueError("Duplicate key: {}".format(key,))
6+
raise ValueError("Duplicate key: {}".format(key,))
97
else:
10-
my_dict[key] = values
11-
return my_dict
8+
my_dict[key] = values
9+
return my_dict

06_execution_time.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414

1515
import time
16+
import random
1617

1718

1819
class ExecutionTime:
@@ -25,9 +26,9 @@ def duration(self):
2526

2627
# ---- run code ---- #
2728

28-
import random
2929

3030
timer = ExecutionTime()
3131
sample_list = list()
32-
my_list = [random.randint(1, 888898) for num in xrange(1, 1000000) if num % 2 == 0]
33-
print 'Finished in {} seconds.'.format(timer.duration())
32+
my_list = [random.randint(1, 888898) for num in
33+
range(1, 1000000) if num % 2 == 0]
34+
print('Finished in {} seconds.'.format(timer.duration()))

07_benchmark_permissions_loading_django.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ def timed(*args, **kw):
1414
te = time.time()
1515
all_times.append(te - ts)
1616

17-
print all_times
18-
print numpy.mean(all_times)
17+
print(all_times)
18+
print(numpy.mean(all_times))
1919
return result
2020

2121
return timed
@@ -39,4 +39,4 @@ def load_new_perms():
3939
while n < 10:
4040
create_new_db()
4141
load_new_perms()
42-
n += 1
42+
n += 1

08_basic_email_web_crawler.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import requests
22
import re
3-
import urlparse
3+
try:
4+
from urllib.parse import urljoin
5+
except ImportError:
6+
from urlparse import urljoin
47

58
# regex
69
email_re = re.compile(r'([\w\.,]+@[\w\.,]+\.\w+)')
@@ -20,13 +23,13 @@ def crawl(url):
2023
# Find links
2124
links = link_re.findall(req.text)
2225

23-
print "\nFound {} links".format(len(links))
26+
print("\nFound {} links".format(len(links)))
2427

2528
# Search links for emails
2629
for link in links:
2730

2831
# Get an absolute URL for a link
29-
link = urlparse.urljoin(url, link)
32+
link = urljoin(url, link)
3033

3134
# Find all emails on current page
3235
result.update(email_re.findall(req.text))
@@ -36,7 +39,7 @@ def crawl(url):
3639
if __name__ == '__main__':
3740
emails = crawl('http://www.realpython.com')
3841

39-
print "\nScrapped e-mail addresses:"
42+
print("\nScrapped e-mail addresses:")
4043
for email in emails:
41-
print email
42-
print "\n"
44+
print(email)
45+
print("\n")

09_basic_link_web_crawler.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import requests
22
import re
3-
import urlparse
3+
try:
4+
from urllib.parse import urljoin
5+
except ImportError:
6+
from urlparse import urljoin
47

58
# regex
69
link_re = re.compile(r'href="(.*?)"')
@@ -17,17 +20,15 @@ def crawl(url):
1720
# Find links
1821
links = link_re.findall(req.text)
1922

20-
print "\nFound {} links".format(len(links))
23+
print("\nFound {} links".format(len(links)))
2124

2225
# Search links for emails
2326
for link in links:
2427

2528
# Get an absolute URL for a link
26-
link = urlparse.urljoin(url, link)
29+
link = urljoin(url, link)
2730

28-
print link
29-
31+
print(link)
3032

3133
if __name__ == '__main__':
3234
crawl('http://www.realpython.com')
33-

10_find_files_recursively.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33

44
# constants
5-
PATH = '/../../../..'
5+
PATH = './'
66
PATTERN = '*.py'
77

88

@@ -14,18 +14,18 @@ def get_file_names(filepath, pattern):
1414
# matches.append(os.path.join(root, filename)) # full path
1515
matches.append(os.path.join(filename)) # just file name
1616
if matches:
17-
print "Found {} files:".format(len(matches))
17+
print("Found {} files:".format(len(matches)))
1818
output_files(matches)
1919
else:
20-
print "No files found."
20+
print("No files found.")
2121
else:
22-
print "Sorry that path does not exist. Try again."
22+
print("Sorry that path does not exist. Try again.")
2323

2424

2525
def output_files(list_of_files):
2626
for filename in list_of_files:
27-
print filename
27+
print(filename)
2828

2929

3030
if __name__ == '__main__':
31-
all_files = get_file_names(PATH, PATTERN)
31+
all_files = get_file_names(PATH, PATTERN)

11_optimize_images_with_wand.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import fnmatch
22
import os
33

4-
# sudo pip install Wand
4+
# pip install Wand
55
from wand.image import Image
6-
# sudo pip install http://pypi.python.org/packages/source/h/hurry.filesize/hurry.filesize-0.9.tar.gz
6+
# pip install http://pypi.python.org/packages/source/h/hurry.filesize/hurry.filesize-0.9.tar.gz
77
from hurry.filesize import size
88

99

@@ -19,12 +19,13 @@ def get_image_file_names(filepath, pattern):
1919
for filename in fnmatch.filter(filenames, pattern):
2020
matches.append(os.path.join(root, filename)) # full path
2121
if matches:
22-
print "Found {} files, with a total file size of {}.".format(len(matches), get_total_size(matches))
22+
print("Found {} files, with a total file size of {}.".format(
23+
len(matches), get_total_size(matches)))
2324
return matches
2425
else:
25-
print "No files found."
26+
print("No files found.")
2627
else:
27-
print "Sorry that path does not exist. Try again."
28+
print("Sorry that path does not exist. Try again.")
2829

2930

3031
def get_total_size(list_of_image_names):
@@ -35,15 +36,15 @@ def get_total_size(list_of_image_names):
3536

3637

3738
def resize_images(list_of_image_names):
38-
print "Optimizing ... "
39+
print("Optimizing ... ")
3940
for index, image_name in enumerate(list_of_image_names):
4041
with open(image_name) as f:
4142
image_binary = f.read()
4243
with Image(blob=image_binary) as img:
4344
if img.height >= 600:
4445
img.transform(resize='x600')
4546
img.save(filename=image_name)
46-
print "Optimization complete."
47+
print("Optimization complete.")
4748

4849

4950
if __name__ == '__main__':

12_csv_split.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,10 @@ def parse_file(arguments):
117117
writer = writer.writerows(chunk)
118118

119119
# Output info
120-
print ""
121-
print "Chunk # {}:".format(current_chunk)
122-
print "Filepath: {}".format(current_output)
123-
print "# of rows: {}".format(len(chunk))
120+
print("")
121+
print("Chunk # {}:".format(current_chunk))
122+
print("Filepath: {}".format(current_output))
123+
print("# of rows: {}".format(len(chunk)))
124124

125125
# Create new chunk
126126
current_chunk += 1

13_random_name_generator.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def random_name_generator(first, second, x):
1010
- number of random names
1111
"""
1212
names = []
13-
for i in xrange(0, int(x)):
13+
for i in range(0, int(x)):
1414
random_first = randint(0, len(first)-1)
1515
random_last = randint(0, len(second)-1)
1616
names.append("{0} {1}".format(
@@ -23,4 +23,4 @@ def random_name_generator(first, second, x):
2323
first_names = ["Drew", "Mike", "Landon", "Jeremy", "Tyler", "Tom", "Avery"]
2424
last_names = ["Smith", "Jones", "Brighton", "Taylor"]
2525
names = random_name_generator(first_names, last_names, 5)
26-
print '\n'.join(names)
26+
print('\n'.join(names))

15_check_my_environment.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def __init__(self, configFile):
1111
pass
1212
1313
def process(self):
14-
print "ok"
14+
print("ok")
1515
1616
if __name__ == "__main__":
1717
m = Main(some_script.CONFIGFILE)
@@ -39,7 +39,7 @@ def get_config_file():
3939
if CONFIGFILE is None:
4040
sys.exit("Configuration error! Unknown environment set. \
4141
Edit config.py and set appropriate environment")
42-
print "Config file: {}".format(CONFIGFILE)
42+
print("Config file: {}".format(CONFIGFILE))
4343
if not os.path.exists(CONFIGFILE):
4444
sys.exit("Configuration error! Config file does not exist")
45-
print "Config ok ...."
45+
print("Config ok ....")

18_zipper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from zipfile import ZipFile
44

55

6-
#set file name and time of creation
6+
# set file name and time of creation
77
today = datetime.now()
88
file_name = 'zipper_' + today.strftime('%Y.%m.%dh%H%M') + '.zip'
99
dir_name = 'tmp/' # update path

20_restore_file_from_git.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from subprocess import check_output, call
22

33

4-
file_name = str(raw_input('Enter the file name: '))
4+
file_name = str(input('Enter the file name: '))
55
commit = check_output(["git", "rev-list", "-n", "1", "HEAD", "--", file_name])
6-
print str(commit).rstrip()
6+
print(str(commit).rstrip())
77
call(["git", "checkout", str(commit).rstrip()+"~1", file_name])
88

99

22_git_tag.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@
1010
subprocess.call(command, shell=True)
1111
subprocess.call('git push --tags', shell=True)
1212
else:
13-
print 'usage: tag.py TAG_NAME COMMIT'
13+
print('usage: tag.py TAG_NAME COMMIT')
1414
sys.exit(1)

24_sql2csv.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import sqlite3
44

55
if len(sys.argv) < 3:
6-
print "Use: {0} DATABASE_NAME TABLE_NAME".format(sys.argv[0])
6+
print("Use: {0} DATABASE_NAME TABLE_NAME".format(sys.argv[0]))
77
exit()
88

99
conn = sqlite3.connect(sys.argv[1])

0 commit comments

Comments
 (0)