33# @Date: 15-11-16
4455# @Last modified by: wendesi
6- # @Last modified time: 15 -11-16
6+ # @Last modified time: 17 -11-16
77
88import cv2
99import time
1818sign_time_count = 0
1919
2020class Sign (object ):
21+ '''
22+ 阈值分类器
23+
24+ 有两种方向,
25+ 1)x<v y=1
26+ 2) x>v y=1
27+ v 是阈值轴
28+
29+ 因为是针对已经二值化后的MNIST数据集,所以v的取值只有3个 {0,1,2}
30+ '''
31+
2132 def __init__ (self ,features ,labels ,w ):
22- self .X = features
23- self .Y = labels
24- self .N = len (labels )
33+ self .X = features # 训练数据特征
34+ self .Y = labels # 训练数据的标签
35+ self .N = len (labels ) # 训练数据大小
2536
26- self .w = w
37+ self .w = w # 训练数据权值分布
2738
28- self .indexes = [0 ,1 ,2 ]
39+ self .indexes = [0 ,1 ,2 ] # 阈值轴可选范围
2940
3041 def _train_less_than_ (self ):
42+ '''
43+ 寻找(x<v y=1)情况下的最优v
44+ '''
45+
3146 index = - 1
3247 error_score = 1000000
3348
@@ -50,6 +65,10 @@ def _train_less_than_(self):
5065
5166
5267 def _train_more_than_ (self ):
68+ '''
69+ 寻找(x>v y=1)情况下的最优v
70+ '''
71+
5372 index = - 1
5473 error_score = 1000000
5574
@@ -106,21 +125,29 @@ def __init__(self):
106125 pass
107126
108127 def _init_parameters_ (self ,features ,labels ):
109- self .X = features
110- self .Y = labels
128+ self .X = features # 训练集特征
129+ self .Y = labels # 训练集标签
111130
112- self .n = len (features [0 ])
113- self .N = len (features )
114- self .M = 60 # 分类器数目
131+ self .n = len (features [0 ]) # 特征维度
132+ self .N = len (features ) # 训练集大小
133+ self .M = 10 # 分类器数目
115134
116- self .w = [1.0 / self .N ]* self .N
117- self .alpha = []
118- self .classifier = []
135+ self .w = [1.0 / self .N ]* self .N # 训练集的权值分布
136+ self .alpha = [] # 分类器系数 公式8.2
137+ self .classifier = [] # (维度,分类器),针对当前维度的分类器
119138
120139 def _w_ (self ,index ,classifier ,i ):
140+ '''
141+ 公式8.4不算Zm
142+ '''
143+
121144 return self .w [i ]* math .exp (- self .alpha [- 1 ]* self .Y [i ]* classifier .predict (self .X [i ][index ]))
122145
123146 def _Z_ (self ,index ,classifier ):
147+ '''
148+ 公式8.5
149+ '''
150+
124151 Z = 0
125152
126153 for i in xrange (self .N ):
@@ -138,7 +165,7 @@ def train(self,features,labels):
138165 time1 = time .time ()
139166 map_time = 0
140167
141- best_classifier = (100000 ,None ,None ) #(误差率,分类器, 针对的特征)
168+ best_classifier = (100000 ,None ,None ) #(误差率,针对的特征,分类器 )
142169 for i in xrange (self .n ):
143170 map_time -= time .time ()
144171 features = map (lambda x :x [i ],self .X )
@@ -168,6 +195,7 @@ def train(self,features,labels):
168195
169196 Z = self ._Z_ (best_classifier [1 ],best_classifier [2 ])
170197
198+ # 计算训练集权值分布 8.4
171199 for i in xrange (self .N ):
172200 self .w [i ] = self ._w_ (best_classifier [1 ],best_classifier [2 ],i )/ Z
173201
0 commit comments