Skip to content

Commit b0d244a

Browse files
committed
Rewrite the codes via python3
Rewrite the codes using python3,and add some comments
1 parent 41ae6fa commit b0d244a

File tree

15 files changed

+135
-123
lines changed

15 files changed

+135
-123
lines changed

AdaBoost/adaboost.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import numpy as np
1313
import pandas as pd
1414

15-
from sklearn.cross_validation import train_test_split
15+
from sklearn.model_selection import train_test_split
1616
from sklearn.metrics import accuracy_score
1717

1818
sign_time_count = 0
@@ -48,7 +48,7 @@ def _train_less_than_(self):
4848

4949
for i in self.indexes:
5050
score = 0
51-
for j in xrange(self.N):
51+
for j in range(self.N):
5252
val = -1
5353
if self.X[j]<i:
5454
val = 1
@@ -74,7 +74,7 @@ def _train_more_than_(self):
7474

7575
for i in self.indexes:
7676
score = 0
77-
for j in xrange(self.N):
77+
for j in range(self.N):
7878
val = 1
7979
if self.X[j]<i:
8080
val = -1
@@ -150,7 +150,7 @@ def _Z_(self,index,classifier):
150150

151151
Z = 0
152152

153-
for i in xrange(self.N):
153+
for i in range(self.N):
154154
Z += self._w_(index,classifier,i)
155155

156156
return Z
@@ -159,16 +159,16 @@ def train(self,features,labels):
159159

160160
self._init_parameters_(features,labels)
161161

162-
for times in xrange(self.M):
162+
for times in range(self.M):
163163
logging.debug('iterater %d' % times)
164164

165165
time1 = time.time()
166166
map_time = 0
167167

168168
best_classifier = (100000,None,None) #(误差率,针对的特征,分类器)
169-
for i in xrange(self.n):
169+
for i in range(self.n):
170170
map_time -= time.time()
171-
features = map(lambda x:x[i],self.X)
171+
features = list(map(lambda x:x[i],self.X))
172172
map_time += time.time()
173173
classifier = Sign(features,self.Y,self.w)
174174
error_score = classifier.train()
@@ -179,10 +179,10 @@ def train(self,features,labels):
179179
em = best_classifier[0]
180180

181181
# 分析用,之后删除 开始
182-
print 'em is %s, index is %d' % (str(em),best_classifier[1])
182+
print('em is %s, index is %d' % (str(em),best_classifier[1]))
183183
time2 = time.time()
184184
global sign_time_count
185-
print '总运行时间:%s, 那两段关键代码运行时间:%s, map的时间是:%s' % (str(time2-time1),str(sign_time_count),str(map_time))
185+
print('总运行时间:%s, 那两段关键代码运行时间:%s, map的时间是:%s' % (str(time2-time1),str(sign_time_count),str(map_time)))
186186
sign_time_count = 0
187187
# 分析用,之后删除 结束
188188

@@ -196,13 +196,13 @@ def train(self,features,labels):
196196
Z = self._Z_(best_classifier[1],best_classifier[2])
197197

198198
# 计算训练集权值分布 8.4
199-
for i in xrange(self.N):
199+
for i in range(self.N):
200200
self.w[i] = self._w_(best_classifier[1],best_classifier[2],i)/Z
201201

202202
def _predict_(self,feature):
203203

204204
result = 0.0
205-
for i in xrange(self.M):
205+
for i in range(self.M):
206206
index = self.classifier[i][0]
207207
classifier = self.classifier[i][1]
208208

@@ -225,7 +225,7 @@ def predict(self,features):
225225
# 二值化
226226
def binaryzation(img):
227227
cv_img = img.astype(np.uint8)
228-
cv2.threshold(cv_img,50,1,cv2.cv.CV_THRESH_BINARY_INV,cv_img)
228+
cv2.threshold(cv_img,50,1,cv2.THRESH_BINARY_INV,cv_img)
229229
return cv_img
230230

231231
def binaryzation_features(trainset):
@@ -248,7 +248,7 @@ def binaryzation_features(trainset):
248248
logger = logging.getLogger()
249249
logger.setLevel(logging.DEBUG)
250250

251-
print 'Start read data'
251+
print('Start read data')
252252

253253
time_1 = time.time()
254254

@@ -264,21 +264,29 @@ def binaryzation_features(trainset):
264264
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.5, random_state=0)
265265

266266
time_2 = time.time()
267-
print 'read data cost ',time_2 - time_1,' second','\n'
267+
print('read data cost ',time_2 - time_1,' second','\n')
268268

269-
print 'Start training'
270-
train_labels = map(lambda x:2*x-1,train_labels)
269+
print('Start training')
270+
'''
271+
In Python 3, map returns an iterator. If your function expects a list,
272+
the iterator has to be explicitly converted, like this:data = list(map(...))
273+
"XXX"object is not subscriptable
274+
说的是XXX对象不是可索引的,可索引的对象有list,tuple等
275+
如果你确定你的XXX是一个可迭代对象的话,
276+
可以尝试用list()函数把它转化为列表,然后通过索引读取元素
277+
'''
278+
train_labels = list(map(lambda x:2*x-1,train_labels))
271279
ada = AdaBoost()
272280
ada.train(train_features, train_labels)
273281

274282
time_3 = time.time()
275-
print 'training cost ',time_3 - time_2,' second','\n'
283+
print('training cost ',time_3 - time_2,' second','\n')
276284

277-
print 'Start predicting'
285+
print('Start predicting')
278286
test_predict = ada.predict(test_features)
279287
time_4 = time.time()
280-
print 'predicting cost ',time_4 - time_3,' second','\n'
288+
print('predicting cost ',time_4 - time_3,' second','\n')
281289

282290
test_labels = map(lambda x:2*x-1,test_labels)
283291
score = accuracy_score(test_labels,test_predict)
284-
print "The accruacy socre is ", score
292+
print("The accruacy socre is ", score)

AdaBoost/adaboost_cpp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import numpy as np
1414
import pandas as pd
1515

16-
from sklearn.cross_validation import train_test_split
16+
from sklearn.model_selection import train_test_split
1717
from sklearn.metrics import accuracy_score
1818

1919
sign_time_count = 0

decision_tree/decision_tree.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pandas as pd
88

99

10-
from sklearn.cross_validation import train_test_split
10+
from sklearn.model_selection import train_test_split
1111
from sklearn.metrics import accuracy_score
1212

1313

@@ -29,7 +29,7 @@ def wrapper(*args, **kwargs):
2929
# 二值化
3030
def binaryzation(img):
3131
cv_img = img.astype(np.uint8)
32-
cv2.threshold(cv_img,50,1,cv2.cv.CV_THRESH_BINARY_INV,cv_img)
32+
cv2.threshold(cv_img,50,1,cv2.THRESH_BINARY_INV,cv_img)
3333
return cv_img
3434

3535
@log
@@ -119,7 +119,7 @@ def recurse_train(train_set,train_label,features,epsilon):
119119
return Tree(LEAF,Class = label_set.pop())
120120

121121
# 步骤2——如果features为空
122-
(max_class,max_len) = max([(i,len(filter(lambda x:x==i,train_label))) for i in xrange(total_class)],key = lambda x:x[1])
122+
(max_class,max_len) = max([(i,len(list(filter(lambda x:x==i,train_label))) for i in range(total_class))],key = lambda x:x[1])
123123

124124
if len(features) == 0:
125125
return Tree(LEAF,Class = max_class)
@@ -150,7 +150,7 @@ def recurse_train(train_set,train_label,features,epsilon):
150150
for feature_value in feature_value_list:
151151

152152
index = []
153-
for i in xrange(len(train_label)):
153+
for i in range(len(train_label)):
154154
if train_set[i][max_feature] == feature_value:
155155
index.append(i)
156156

@@ -197,7 +197,7 @@ def predict(test_set,tree):
197197
test_predict = predict(test_features,tree)
198198
score = accuracy_score(test_labels,test_predict)
199199

200-
print "The accruacy socre is ", score
200+
print("The accruacy socre is ", score)
201201

202202

203203

knn/distance_test.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,13 @@
1313

1414
time_1 = time.time()
1515

16-
print np.sqrt(np.sum(np.square(vec_1 - vec_2)))
1716

17+
print(np.sqrt(np.sum(np.square(vec_1 - vec_2))))
18+
###core Linear Algebra Tools,norm,inv,solve,det,lstsq,pinv,matrix_power
1819
time_2 = time.time()
19-
print time_2-time_1
20+
print(time_2-time_1)
2021

2122
print np.linalg.norm(vec_1 - vec_2)
2223

2324
time_3 = time.time()
24-
print time_3-time_2
25+
print(time_3-time_2)

knn/knn.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import random
77
import time
88

9-
from sklearn.cross_validation import train_test_split
9+
from sklearn.model_selection import train_test_split
1010
from sklearn.metrics import accuracy_score
1111

1212

@@ -35,7 +35,7 @@ def Predict(testset,trainset,train_labels):
3535

3636
for test_vec in testset:
3737
# 输出当前运行的测试用例坐标,用于测试
38-
print count
38+
print(count)
3939
count += 1
4040

4141
knn_list = [] # 当前k个最近邻居
@@ -93,7 +93,7 @@ def Predict(testset,trainset,train_labels):
9393

9494
if __name__ == '__main__':
9595

96-
print 'Start read data'
96+
print('Start read data')
9797

9898
time_1 = time.time()
9999

@@ -111,17 +111,17 @@ def Predict(testset,trainset,train_labels):
111111
# print train_features.shape
112112

113113
time_2 = time.time()
114-
print 'read data cost ',time_2 - time_1,' second','\n'
114+
print('read data cost ',time_2 - time_1,' second','\n')
115115

116-
print 'Start training'
117-
print 'knn do not need to train'
116+
print('Start training')
117+
print('knn do not need to train')
118118
time_3 = time.time()
119-
print 'training cost ',time_3 - time_2,' second','\n'
119+
print('training cost ',time_3 - time_2,' second','\n')
120120

121-
print 'Start predicting'
121+
print('Start predicting')
122122
test_predict = Predict(test_features,train_features,train_labels)
123123
time_4 = time.time()
124-
print 'predicting cost ',time_4 - time_3,' second','\n'
124+
print('predicting cost ',time_4 - time_3,' second','\n')
125125

126126
score = accuracy_score(test_labels,test_predict)
127-
print "The accruacy socre is ", score
127+
print("The accruacy socre is ", score)
Binary file not shown.

logistic_regression/binary_perceptron.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import random
1313
import time
1414

15-
from sklearn.cross_validation import train_test_split
15+
from sklearn.model_selection import train_test_split
1616
from sklearn.metrics import accuracy_score
1717

1818

@@ -23,7 +23,7 @@ def __init__(self):
2323
self.max_iteration = 5000
2424

2525
def predict_(self, x):
26-
wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))])
26+
wx = sum([self.w[j] * x[j] for j in range(len(self.w))])
2727
return int(wx > 0)
2828

2929
def train(self, features, labels):
@@ -37,15 +37,15 @@ def train(self, features, labels):
3737
x = list(features[index])
3838
x.append(1.0)
3939
y = 2 * labels[index] - 1
40-
wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))])
40+
wx = sum([self.w[j] * x[j] for j in range(len(self.w))])
4141

4242
if wx * y > 0:
4343
correct_count += 1
4444
if correct_count > self.max_iteration:
4545
break
4646
continue
4747

48-
for i in xrange(len(self.w)):
48+
for i in range(len(self.w)):
4949
self.w[i] += self.learning_step * (y * x[i])
5050

5151
def predict(self,features):
@@ -59,7 +59,7 @@ def predict(self,features):
5959

6060
if __name__ == '__main__':
6161

62-
print 'Start read data'
62+
print('Start read data')
6363

6464
time_1 = time.time()
6565

@@ -76,19 +76,19 @@ def predict(self,features):
7676
# print train_features.shape
7777

7878
time_2 = time.time()
79-
print 'read data cost ', time_2 - time_1, ' second', '\n'
79+
print('read data cost ', time_2 - time_1, ' second', '\n')
8080

81-
print 'Start training'
81+
print('Start training')
8282
p = Perceptron()
8383
p.train(train_features, train_labels)
8484

8585
time_3 = time.time()
86-
print 'training cost ', time_3 - time_2, ' second', '\n'
86+
print('training cost ', time_3 - time_2, ' second', '\n')
8787

88-
print 'Start predicting'
88+
print('Start predicting')
8989
test_predict = p.predict(test_features)
9090
time_4 = time.time()
91-
print 'predicting cost ', time_4 - time_3, ' second', '\n'
91+
print('predicting cost ', time_4 - time_3, ' second', '\n')
9292

9393
score = accuracy_score(test_labels, test_predict)
94-
print "The accruacy socre is ", score
94+
print("The accruacy socre is ", score)

logistic_regression/competation.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from binary_perceptron import Perceptron
1212
from logistic_regression import LogisticRegression
1313

14-
from sklearn.cross_validation import train_test_split
14+
from sklearn.model_selection import train_test_split
1515
from sklearn.metrics import accuracy_score
1616

1717
if __name__ == '__main__':
@@ -29,8 +29,8 @@
2929

3030
writer = csv.writer(file('result.csv', 'wb'))
3131

32-
for time in xrange(test_time):
33-
print 'iterater time %d' % time
32+
for time in range(test_time):
33+
print('iterater time %d' % time)
3434

3535
train_features, test_features, train_labels, test_labels = train_test_split(
3636
imgs, labels, test_size=0.33, random_state=23323)
@@ -44,7 +44,7 @@
4444
p_score = accuracy_score(test_labels, p_predict)
4545
lr_score = accuracy_score(test_labels, lr_predict)
4646

47-
print 'perceptron accruacy score ', p_score
48-
print 'logistic Regression accruacy score ', lr_score
47+
print('perceptron accruacy score ', p_score)
48+
print('logistic Regression accruacy score ', lr_score)
4949

5050
writer.writerow([time,p_score,lr_score])

0 commit comments

Comments
 (0)