66# It is made available under the MIT License
77
88import os
9+ import sys
910import collections
1011import csv
1112import json
@@ -57,7 +58,7 @@ def load_sanders_data(dirname=".", line_count=-1):
5758 try :
5859 tweet = json .load (open (tweet_fn , "r" ))
5960 except IOError :
60- print ("Tweet '%s' not found. Skip." % tweet_fn )
61+ print (( "Tweet '%s' not found. Skip." % tweet_fn ) )
6162 continue
6263
6364 if 'text' in tweet and tweet ['user' ]['lang' ] == "en" :
@@ -84,14 +85,14 @@ def plot_pr(auc_score, name, phase, precision, recall, label=None):
8485 pylab .title ('P/R curve (AUC=%0.2f) / %s' % (auc_score , label ))
8586 filename = name .replace (" " , "_" )
8687 pylab .savefig (os .path .join (CHART_DIR , "pr_%s_%s.png" %
87- (filename , phase )), bbox_inches = "tight" )
88+ (filename , phase )), bbox_inches = "tight" )
8889
8990
9091def show_most_informative_features (vectorizer , clf , n = 20 ):
9192 c_f = sorted (zip (clf .coef_ [0 ], vectorizer .get_feature_names ()))
92- top = zip (c_f [:n ], c_f [:- (n + 1 ):- 1 ])
93+ top = list ( zip (c_f [:n ], c_f [:- (n + 1 ):- 1 ]) )
9394 for (c1 , f1 ), (c2 , f2 ) in top :
94- print "\t %.4f\t %-15s\t \t %.4f\t %-15s" % (c1 , f1 , c2 , f2 )
95+ print ( "\t %.4f\t %-15s\t \t %.4f\t %-15s" % (c1 , f1 , c2 , f2 ) )
9596
9697
9798def plot_log ():
@@ -119,7 +120,7 @@ def plot_feat_importance(feature_names, clf, name):
119120 inds = np .argsort (coef )
120121 f_imp = f_imp [inds ]
121122 coef = coef [inds ]
122- xpos = np .array (range (len (coef )))
123+ xpos = np .array (list ( range (len (coef ) )))
123124 pylab .bar (xpos , coef , width = 1 )
124125
125126 pylab .title ('Feature importance for %s' % (name ))
@@ -181,8 +182,13 @@ def plot_bias_variance(data_sizes, train_errors, test_errors, name):
181182def load_sent_word_net ():
182183
183184 sent_scores = collections .defaultdict (list )
185+ sentiwordnet_path = os .path .join (DATA_DIR , "SentiWordNet_3.0.0_20130122.txt" )
184186
185- with open (os .path .join (DATA_DIR , "SentiWordNet_3.0.0_20130122.txt" ), "r" ) as csvfile :
187+ if not os .path .exists (sentiwordnet_path ):
188+ print ("Please download SentiWordNet_3.0.0 from http://sentiwordnet.isti.cnr.it/download.php, extract it and put it into the data directory" )
189+ sys .exit (1 )
190+
191+ with open (sentiwordnet_path , 'r' ) as csvfile :
186192 reader = csv .reader (csvfile , delimiter = '\t ' , quotechar = '"' )
187193 for line in reader :
188194 if line [0 ].startswith ("#" ):
@@ -200,7 +206,7 @@ def load_sent_word_net():
200206 term = term .replace ("-" , " " ).replace ("_" , " " )
201207 key = "%s/%s" % (POS , term .split ("#" )[0 ])
202208 sent_scores [key ].append ((float (PosScore ), float (NegScore )))
203- for key , value in sent_scores .iteritems ():
209+ for key , value in sent_scores .items ():
204210 sent_scores [key ] = np .mean (value , axis = 0 )
205211
206212 return sent_scores
0 commit comments