|
41 | 41 | You need to install python-twitter: |
42 | 42 | pip install python-twitter |
43 | 43 | If pip is not found you might have to install it using easy_install. |
44 | | -If it does not work on your system, you might want to follow instructions |
| 44 | +If it does not work on your system, you might want to follow instructions |
45 | 45 | at http://code.google.com/p/python-twitter/ """) |
46 | 46 |
|
47 | 47 | sys.exit(1) |
@@ -97,22 +97,25 @@ def purge_already_fetched(fetch_list, raw_dir): |
97 | 97 |
|
98 | 98 | # list of tweet ids that still need downloading |
99 | 99 | rem_list = [] |
| 100 | + count_done = 0 |
100 | 101 |
|
101 | 102 | # check each tweet to see if we have it |
102 | 103 | for item in fetch_list: |
103 | 104 |
|
104 | 105 | # check if json file exists |
105 | | - tweet_file = raw_dir + item[2] + '.json' |
| 106 | + tweet_file = os.path.join(raw_dir, item[2] + '.json') |
106 | 107 | if os.path.exists(tweet_file): |
107 | 108 |
|
108 | 109 | # attempt to parse json file |
109 | 110 | try: |
110 | 111 | parse_tweet_json(tweet_file) |
111 | 112 | print '--> already downloaded #' + item[2] |
| 113 | + count_done += 1 |
112 | 114 | except RuntimeError: |
113 | 115 | rem_list.append(item) |
114 | 116 | else: |
115 | 117 | rem_list.append(item) |
| 118 | + print "done=",count_done |
116 | 119 |
|
117 | 120 | return rem_list |
118 | 121 |
|
@@ -158,14 +161,19 @@ def download_tweets(fetch_list, raw_dir): |
158 | 161 | # New Twitter API 1.1 |
159 | 162 | try: |
160 | 163 | json_data = api.GetStatus(item[2]).AsJsonString() |
| 164 | + |
161 | 165 | except twitter.TwitterError, e: |
162 | | - fatal = False |
| 166 | + fatal = True |
163 | 167 | for m in e.message: |
164 | 168 | if m['code'] == 34: |
165 | 169 | print "Tweet missing: ",item |
166 | 170 | # [{u'message': u'Sorry, that page does not exist', u'code': 34}] |
167 | 171 | fatal = False |
168 | 172 | break |
| 173 | + elif m['code'] == 88: |
| 174 | + print "Rate limit exceeded. Please lower max_tweets_per_hr." |
| 175 | + fatal = True |
| 176 | + break |
169 | 177 |
|
170 | 178 | if fatal: |
171 | 179 | raise |
@@ -256,11 +264,13 @@ def main(data_path): |
256 | 264 |
|
257 | 265 | # get user parameters |
258 | 266 | user_params = get_user_params(data_path) |
| 267 | + print user_params |
259 | 268 | dump_user_params(user_params) |
260 | 269 |
|
261 | 270 | # get fetch list |
262 | 271 | total_list = read_total_list(user_params['inList']) |
263 | 272 | fetch_list = purge_already_fetched(total_list, user_params['rawDir']) |
| 273 | + print "Fetching %i tweets"%len(fetch_list) |
264 | 274 |
|
265 | 275 | # start fetching data from twitter |
266 | 276 | download_tweets(fetch_list, user_params['rawDir']) |
|
0 commit comments