@@ -100,7 +100,7 @@ def _init_parameters_(self,features,labels):
100100
101101 self .n = len (features [0 ])
102102 self .N = len (features )
103- self .M = 10000 # 分类器数目
103+ self .M = 100000 # 分类器数目
104104
105105 self .w = [1.0 / self .N ]* self .N
106106 self .alpha = []
@@ -170,11 +170,34 @@ def predict(self,features):
170170 return results
171171
172172if __name__ == '__main__' :
173- features = [[0 ],[1 ],[2 ],[3 ],[4 ],[5 ],[6 ],[7 ],[8 ],[9 ]]
174- labels = [1 ,1 ,1 ,- 1 ,- 1 ,- 1 ,1 ,1 ,1 ,- 1 ]
173+ print 'Start read data'
175174
175+ time_1 = time .time ()
176176
177+ raw_data = pd .read_csv ('../data/train_binary.csv' ,header = 0 )
178+ data = raw_data .values
177179
180+ imgs = data [0 ::,1 ::]
181+ labels = data [::,0 ]
178182
183+
184+ # 选取 2/3 数据作为训练集, 1/3 数据作为测试集
185+ train_features , test_features , train_labels , test_labels = train_test_split (imgs , labels , test_size = 0.33 , random_state = 23323 )
186+
187+ time_2 = time .time ()
188+ print 'read data cost ' ,time_2 - time_1 ,' second' ,'\n '
189+
190+ print 'Start training'
179191 ada = AdaBoost ()
180- ada .train (features ,labels )
192+ ada .train (train_features , train_labels )
193+
194+ time_3 = time .time ()
195+ print 'training cost ' ,time_3 - time_2 ,' second' ,'\n '
196+
197+ print 'Start predicting'
198+ test_predict = ada .predict (test_features )
199+ time_4 = time .time ()
200+ print 'predicting cost ' ,time_4 - time_3 ,' second' ,'\n '
201+
202+ score = accuracy_score (test_labels ,test_predict )
203+ print "The accruacy socre is " , score
0 commit comments