Skip to content

Commit 6775c68

Browse files
committed
Supporting Twitter API 1.1
1 parent 6b0c827 commit 6775c68

File tree

2 files changed

+31
-42
lines changed

2 files changed

+31
-42
lines changed

ch06/install.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,23 @@
2626
# Excuse the ugly code. I threw this together as quickly as possible and I
2727
# don't normally code in Python.
2828
#
29+
30+
# In Sanders' original form, the code was using Twitter API 1.0.
31+
# Now that Twitter moved to 1.1, we had to make a few changes.
32+
# Cf. twitterauth.py for the details.
33+
2934
import csv
30-
import getpass
3135
import json
3236
import os
3337
import time
3438
import urllib
3539

40+
from twitterauth import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET
41+
42+
import twitter
43+
api = twitter.Api(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET,
44+
access_token_key=ACCESS_TOKEN_KEY, access_token_secret=ACCESS_TOKEN_SECRET)
45+
3646

3747
def get_user_params(data_path):
3848

@@ -124,7 +134,6 @@ def download_tweets(fetch_list, raw_dir):
124134

125135
# download tweets
126136
for idx in range(0, len(fetch_list)):
127-
128137
# current item
129138
item = fetch_list[idx]
130139

@@ -133,9 +142,16 @@ def download_tweets(fetch_list, raw_dir):
133142
print '--> downloading tweet #%s (%d of %d) (%s left)' % \
134143
(item[2], idx + 1, len(fetch_list), trem)
135144

145+
# Old Twitter API 1.0
136146
# pull data
137-
url = 'http://api.twitter.com/1/statuses/show.json?id=' + item[2]
138-
urllib.urlretrieve(url, raw_dir + item[2] + '.json')
147+
# url = 'https://api.twitter.com/1/statuses/show.json?id=' + item[2]
148+
# print url
149+
# urllib.urlretrieve(url, raw_dir + item[2] + '.json')
150+
151+
# New Twitter API 1.1
152+
json_data = api.GetStatus(item[2]).AsJsonString()
153+
with open(raw_dir + item[2] + '.json', "w") as f:
154+
f.write(json_data + "\n")
139155

140156
# stay in Twitter API rate limits
141157
print ' pausing %d sec to obey Twitter API rate limits' % \
@@ -236,8 +252,6 @@ def main(data_path):
236252
build_output_corpus(user_params['outList'], user_params['rawDir'],
237253
total_list)
238254

239-
return
240-
241255

242256
if __name__ == '__main__':
243-
main(os.path.join("..", "data"))
257+
main("data")

ch06/utils.py

Lines changed: 10 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,21 @@
77

88
import os
99
import collections
10+
import csv
11+
import json
1012

1113
from matplotlib import pylab
1214
import numpy as np
1315

14-
DATA_DIR = os.path.join("..", "data")
15-
CHART_DIR = os.path.join("..", "charts")
1616

17-
import csv
18-
import json
17+
DATA_DIR = "data"
18+
CHART_DIR = "charts"
19+
20+
if not os.path.exists(DATA_DIR):
21+
raise RuntimeError("Expecting directory 'data' in current path")
22+
23+
if not os.path.exists(CHART_DIR):
24+
os.mkdir(CHART_DIR)
1925

2026

2127
def tweak_labels(Y, pos_sent_list):
@@ -58,40 +64,9 @@ def load_sanders_data(dirname=".", line_count=-1):
5864
tweets = np.asarray(tweets)
5965
labels = np.asarray(labels)
6066

61-
# return topics, tweets, labels
6267
return tweets, labels
6368

6469

65-
def load_kaggle_data(filename="kaggle/training.txt", line_count=-1):
66-
count = 0
67-
68-
labels = []
69-
texts = []
70-
71-
read_texts = set([])
72-
73-
for line in open(os.path.join(DATA_DIR, filename), "r"):
74-
count += 1
75-
if line_count > 0 and count > line_count:
76-
break
77-
78-
label, text = line.split("\t")
79-
80-
# Some tweets occur multiple times, so we have to
81-
# remove them to not bias the training set.
82-
if text in read_texts:
83-
continue
84-
read_texts.add(text)
85-
86-
labels.append(label)
87-
texts.append(text)
88-
89-
texts = np.asarray(texts)
90-
labels = np.asarray(labels, dtype=np.int)
91-
92-
return texts, labels
93-
94-
9570
def plot_pr(auc_score, name, phase, precision, recall, label=None):
9671
pylab.clf()
9772
pylab.figure(num=None, figsize=(5, 4))

0 commit comments

Comments
 (0)