1212import numpy as np
1313import pandas as pd
1414
15- from sklearn .cross_validation import train_test_split
15+ from sklearn .model_selection import train_test_split
1616from sklearn .metrics import accuracy_score
1717
1818sign_time_count = 0
@@ -48,7 +48,7 @@ def _train_less_than_(self):
4848
4949 for i in self .indexes :
5050 score = 0
51- for j in xrange (self .N ):
51+ for j in range (self .N ):
5252 val = - 1
5353 if self .X [j ]< i :
5454 val = 1
@@ -74,7 +74,7 @@ def _train_more_than_(self):
7474
7575 for i in self .indexes :
7676 score = 0
77- for j in xrange (self .N ):
77+ for j in range (self .N ):
7878 val = 1
7979 if self .X [j ]< i :
8080 val = - 1
@@ -150,7 +150,7 @@ def _Z_(self,index,classifier):
150150
151151 Z = 0
152152
153- for i in xrange (self .N ):
153+ for i in range (self .N ):
154154 Z += self ._w_ (index ,classifier ,i )
155155
156156 return Z
@@ -159,16 +159,16 @@ def train(self,features,labels):
159159
160160 self ._init_parameters_ (features ,labels )
161161
162- for times in xrange (self .M ):
162+ for times in range (self .M ):
163163 logging .debug ('iterater %d' % times )
164164
165165 time1 = time .time ()
166166 map_time = 0
167167
168168 best_classifier = (100000 ,None ,None ) #(误差率,针对的特征,分类器)
169- for i in xrange (self .n ):
169+ for i in range (self .n ):
170170 map_time -= time .time ()
171- features = map (lambda x :x [i ],self .X )
171+ features = list ( map (lambda x :x [i ],self .X ) )
172172 map_time += time .time ()
173173 classifier = Sign (features ,self .Y ,self .w )
174174 error_score = classifier .train ()
@@ -179,10 +179,10 @@ def train(self,features,labels):
179179 em = best_classifier [0 ]
180180
181181 # 分析用,之后删除 开始
182- print 'em is %s, index is %d' % (str (em ),best_classifier [1 ])
182+ print ( 'em is %s, index is %d' % (str (em ),best_classifier [1 ]) )
183183 time2 = time .time ()
184184 global sign_time_count
185- print '总运行时间:%s, 那两段关键代码运行时间:%s, map的时间是:%s' % (str (time2 - time1 ),str (sign_time_count ),str (map_time ))
185+ print ( '总运行时间:%s, 那两段关键代码运行时间:%s, map的时间是:%s' % (str (time2 - time1 ),str (sign_time_count ),str (map_time ) ))
186186 sign_time_count = 0
187187 # 分析用,之后删除 结束
188188
@@ -196,13 +196,13 @@ def train(self,features,labels):
196196 Z = self ._Z_ (best_classifier [1 ],best_classifier [2 ])
197197
198198 # 计算训练集权值分布 8.4
199- for i in xrange (self .N ):
199+ for i in range (self .N ):
200200 self .w [i ] = self ._w_ (best_classifier [1 ],best_classifier [2 ],i )/ Z
201201
202202 def _predict_ (self ,feature ):
203203
204204 result = 0.0
205- for i in xrange (self .M ):
205+ for i in range (self .M ):
206206 index = self .classifier [i ][0 ]
207207 classifier = self .classifier [i ][1 ]
208208
@@ -225,7 +225,7 @@ def predict(self,features):
225225# 二值化
226226def binaryzation (img ):
227227 cv_img = img .astype (np .uint8 )
228- cv2 .threshold (cv_img ,50 ,1 ,cv2 .cv . CV_THRESH_BINARY_INV ,cv_img )
228+ cv2 .threshold (cv_img ,50 ,1 ,cv2 .THRESH_BINARY_INV ,cv_img )
229229 return cv_img
230230
231231def binaryzation_features (trainset ):
@@ -248,7 +248,7 @@ def binaryzation_features(trainset):
248248 logger = logging .getLogger ()
249249 logger .setLevel (logging .DEBUG )
250250
251- print 'Start read data'
251+ print ( 'Start read data' )
252252
253253 time_1 = time .time ()
254254
@@ -264,21 +264,29 @@ def binaryzation_features(trainset):
264264 train_features , test_features , train_labels , test_labels = train_test_split (features , labels , test_size = 0.5 , random_state = 0 )
265265
266266 time_2 = time .time ()
267- print 'read data cost ' ,time_2 - time_1 ,' second' ,'\n '
267+ print ( 'read data cost ' ,time_2 - time_1 ,' second' ,'\n ' )
268268
269- print 'Start training'
270- train_labels = map (lambda x :2 * x - 1 ,train_labels )
269+ print ('Start training' )
270+ '''
271+ In Python 3, map returns an iterator. If your function expects a list,
272+ the iterator has to be explicitly converted, like this:data = list(map(...))
273+ "XXX"object is not subscriptable
274+ 说的是XXX对象不是可索引的,可索引的对象有list,tuple等
275+ 如果你确定你的XXX是一个可迭代对象的话,
276+ 可以尝试用list()函数把它转化为列表,然后通过索引读取元素
277+ '''
278+ train_labels = list (map (lambda x :2 * x - 1 ,train_labels ))
271279 ada = AdaBoost ()
272280 ada .train (train_features , train_labels )
273281
274282 time_3 = time .time ()
275- print 'training cost ' ,time_3 - time_2 ,' second' ,'\n '
283+ print ( 'training cost ' ,time_3 - time_2 ,' second' ,'\n ' )
276284
277- print 'Start predicting'
285+ print ( 'Start predicting' )
278286 test_predict = ada .predict (test_features )
279287 time_4 = time .time ()
280- print 'predicting cost ' ,time_4 - time_3 ,' second' ,'\n '
288+ print ( 'predicting cost ' ,time_4 - time_3 ,' second' ,'\n ' )
281289
282290 test_labels = map (lambda x :2 * x - 1 ,test_labels )
283291 score = accuracy_score (test_labels ,test_predict )
284- print "The accruacy socre is " , score
292+ print ( "The accruacy socre is " , score )
0 commit comments