@@ -153,7 +153,7 @@ def transform(self, documents):
153153}
154154
155155emo_repl_order = [k for (k_len , k ) in reversed (
156- sorted ([(len (k ), k ) for k in emo_repl .keys ()]))]
156+ sorted ([(len (k ), k ) for k in list ( emo_repl .keys () )]))]
157157
158158re_repl = {
159159 r"\br\b" : "are" ,
@@ -179,7 +179,7 @@ def preprocessor(tweet):
179179
180180 for k in emo_repl_order :
181181 tweet = tweet .replace (k , emo_repl [k ])
182- for r , repl in re_repl .iteritems ():
182+ for r , repl in re_repl .items ():
183183 tweet = re .sub (r , repl , tweet )
184184
185185 return tweet .replace ("-" , " " ).replace ("_" , " " )
@@ -220,7 +220,7 @@ def __grid_search_model(clf_factory, X, Y):
220220 verbose = 10 )
221221 grid_search .fit (X , Y )
222222 clf = grid_search .best_estimator_
223- print clf
223+ print ( clf )
224224
225225 return clf
226226
@@ -275,7 +275,7 @@ def train_model(clf, X, Y, name="NB ngram", plot=False):
275275
276276 summary = (np .mean (scores ), np .std (scores ),
277277 np .mean (pr_scores ), np .std (pr_scores ))
278- print "%.3f\t %.3f\t %.3f\t %.3f\t " % summary
278+ print ( "%.3f\t %.3f\t %.3f\t %.3f\t " % summary )
279279
280280 return np .mean (train_errors ), np .mean (test_errors )
281281
@@ -286,9 +286,9 @@ def print_incorrect(clf, X, Y):
286286 X_wrong = X [wrong_idx ]
287287 Y_wrong = Y [wrong_idx ]
288288 Y_hat_wrong = Y_hat [wrong_idx ]
289- for idx in xrange (len (X_wrong )):
290- print "clf.predict('%s')=%i instead of %i" % \
291- (X_wrong [idx ], Y_hat_wrong [idx ], Y_wrong [idx ])
289+ for idx in range (len (X_wrong )):
290+ print ( "clf.predict('%s')=%i instead of %i" %
291+ (X_wrong [idx ], Y_hat_wrong [idx ], Y_wrong [idx ]) )
292292
293293
294294def get_best_model ():
@@ -315,35 +315,35 @@ def get_best_model():
315315 #Y_orig = Y_orig[:100,]
316316 classes = np .unique (Y_orig )
317317 for c in classes :
318- print "#%s: %i" % (c , sum (Y_orig == c ))
318+ print ( "#%s: %i" % (c , sum (Y_orig == c ) ))
319319
320- print "== Pos vs. neg =="
320+ print ( "== Pos vs. neg ==" )
321321 pos_neg = np .logical_or (Y_orig == "positive" , Y_orig == "negative" )
322322 X = X_orig [pos_neg ]
323323 Y = Y_orig [pos_neg ]
324324 Y = tweak_labels (Y , ["positive" ])
325325 train_model (get_best_model (), X , Y , name = "pos vs neg" , plot = True )
326326
327- print "== Pos/neg vs. irrelevant/neutral =="
327+ print ( "== Pos/neg vs. irrelevant/neutral ==" )
328328 X = X_orig
329329 Y = tweak_labels (Y_orig , ["positive" , "negative" ])
330330
331331 # best_clf = grid_search_model(create_union_model, X, Y, name="sent vs
332332 # rest", plot=True)
333333 train_model (get_best_model (), X , Y , name = "pos+neg vs rest" , plot = True )
334334
335- print "== Pos vs. rest =="
335+ print ( "== Pos vs. rest ==" )
336336 X = X_orig
337337 Y = tweak_labels (Y_orig , ["positive" ])
338338 train_model (get_best_model (), X , Y , name = "pos vs rest" ,
339339 plot = True )
340340
341- print "== Neg vs. rest =="
341+ print ( "== Neg vs. rest ==" )
342342 X = X_orig
343343 Y = tweak_labels (Y_orig , ["negative" ])
344344 train_model (get_best_model (), X , Y , name = "neg vs rest" ,
345345 plot = True )
346346
347- print "time spent:" , time .time () - start_time
347+ print ( "time spent:" , time .time () - start_time )
348348
349349 json .dump (poscache , open (poscache_filename , "w" ))
0 commit comments