33# @Date:   15-11-16 
4455# @Last modified by:   wendesi 
6- # @Last modified time: 15 -11-16 
6+ # @Last modified time: 17 -11-16 
77
88import  cv2 
99import  time 
1818sign_time_count  =  0 
1919
2020class  Sign (object ):
21+     ''' 
22+     阈值分类器 
23+ 
24+     有两种方向, 
25+         1)x<v y=1 
26+         2) x>v y=1 
27+         v 是阈值轴 
28+ 
29+     因为是针对已经二值化后的MNIST数据集,所以v的取值只有3个 {0,1,2} 
30+     ''' 
31+ 
2132    def  __init__ (self ,features ,labels ,w ):
22-         self .X  =  features 
23-         self .Y  =  labels 
24-         self .N  =  len (labels )
33+         self .X  =  features                 # 训练数据特征 
34+         self .Y  =  labels                   # 训练数据的标签 
35+         self .N  =  len (labels )             # 训练数据大小 
2536
26-         self .w  =  w 
37+         self .w  =  w                        # 训练数据权值分布 
2738
28-         self .indexes  =  [0 ,1 ,2 ]
39+         self .indexes  =  [0 ,1 ,2 ]           # 阈值轴可选范围 
2940
3041    def  _train_less_than_ (self ):
42+         ''' 
43+         寻找(x<v y=1)情况下的最优v 
44+         ''' 
45+ 
3146        index  =  - 1 
3247        error_score  =  1000000 
3348
@@ -50,6 +65,10 @@ def _train_less_than_(self):
5065
5166
5267    def  _train_more_than_ (self ):
68+         ''' 
69+         寻找(x>v y=1)情况下的最优v 
70+         ''' 
71+ 
5372        index  =  - 1 
5473        error_score  =  1000000 
5574
@@ -106,21 +125,29 @@ def __init__(self):
106125        pass 
107126
108127    def  _init_parameters_ (self ,features ,labels ):
109-         self .X  =  features 
110-         self .Y  =  labels 
128+         self .X  =  features                             # 训练集特征 
129+         self .Y  =  labels                               # 训练集标签 
111130
112-         self .n  =  len (features [0 ])
113-         self .N  =  len (features )
114-         self .M  =  60                             # 分类器数目 
131+         self .n  =  len (features [0 ])                    # 特征维度 
132+         self .N  =  len (features )                       # 训练集大小 
133+         self .M  =  10                                   # 分类器数目 
115134
116-         self .w  =  [1.0 / self .N ]* self .N 
117-         self .alpha  =  []
118-         self .classifier  =  []
135+         self .w  =  [1.0 / self .N ]* self .N                  # 训练集的权值分布 
136+         self .alpha  =  []                              # 分类器系数  公式8.2 
137+         self .classifier  =  []                         # (维度,分类器),针对当前维度的分类器 
119138
120139    def  _w_ (self ,index ,classifier ,i ):
140+         ''' 
141+         公式8.4不算Zm 
142+         ''' 
143+ 
121144        return  self .w [i ]* math .exp (- self .alpha [- 1 ]* self .Y [i ]* classifier .predict (self .X [i ][index ]))
122145
123146    def  _Z_ (self ,index ,classifier ):
147+         ''' 
148+         公式8.5 
149+         ''' 
150+ 
124151        Z  =  0 
125152
126153        for  i  in  xrange (self .N ):
@@ -138,7 +165,7 @@ def train(self,features,labels):
138165            time1  =  time .time ()
139166            map_time  =  0 
140167
141-             best_classifier  =  (100000 ,None ,None )        #(误差率,分类器, 针对的特征) 
168+             best_classifier  =  (100000 ,None ,None )        #(误差率,针对的特征,分类器 ) 
142169            for  i  in  xrange (self .n ):
143170                map_time  -=  time .time ()
144171                features  =  map (lambda  x :x [i ],self .X )
@@ -168,6 +195,7 @@ def train(self,features,labels):
168195
169196            Z  =  self ._Z_ (best_classifier [1 ],best_classifier [2 ])
170197
198+             # 计算训练集权值分布 8.4 
171199            for  i  in  xrange (self .N ):
172200                self .w [i ] =  self ._w_ (best_classifier [1 ],best_classifier [2 ],i )/ Z 
173201
0 commit comments