@@ -100,7 +100,7 @@ def _init_parameters_(self,features,labels):
100100
101101        self .n  =  len (features [0 ])
102102        self .N  =  len (features )
103-         self .M  =  10000                             # 分类器数目 
103+         self .M  =  100000                             # 分类器数目 
104104
105105        self .w  =  [1.0 / self .N ]* self .N 
106106        self .alpha  =  []
@@ -170,11 +170,34 @@ def predict(self,features):
170170        return  results 
171171
172172if  __name__  ==  '__main__' :
173-     features  =  [[0 ],[1 ],[2 ],[3 ],[4 ],[5 ],[6 ],[7 ],[8 ],[9 ]]
174-     labels  =  [1 ,1 ,1 ,- 1 ,- 1 ,- 1 ,1 ,1 ,1 ,- 1 ]
173+     print  'Start read data' 
175174
175+     time_1  =  time .time ()
176176
177+     raw_data  =  pd .read_csv ('../data/train_binary.csv' ,header = 0 )
178+     data  =  raw_data .values 
177179
180+     imgs  =  data [0 ::,1 ::]
181+     labels  =  data [::,0 ]
178182
183+ 
184+     # 选取 2/3 数据作为训练集, 1/3 数据作为测试集 
185+     train_features , test_features , train_labels , test_labels  =  train_test_split (imgs , labels , test_size = 0.33 , random_state = 23323 )
186+ 
187+     time_2  =  time .time ()
188+     print  'read data cost ' ,time_2  -  time_1 ,' second' ,'\n ' 
189+ 
190+     print  'Start training' 
179191    ada  =  AdaBoost ()
180-     ada .train (features ,labels )
192+     ada .train (train_features , train_labels )
193+ 
194+     time_3  =  time .time ()
195+     print  'training cost ' ,time_3  -  time_2 ,' second' ,'\n ' 
196+ 
197+     print  'Start predicting' 
198+     test_predict  =  ada .predict (test_features )
199+     time_4  =  time .time ()
200+     print  'predicting cost ' ,time_4  -  time_3 ,' second' ,'\n ' 
201+ 
202+     score  =  accuracy_score (test_labels ,test_predict )
203+     print  "The accruacy socre is " , score 
0 commit comments