kendiea-com
diff --git a/‎AdaBoost/adaboost.py‎
Lines changed: 28 additions & 20 deletions b/‎AdaBoost/adaboost.py‎
Lines changed: 28 additions & 20 deletions
diff --git a/‎AdaBoost/adaboost_cpp.py‎
Lines changed: 1 addition & 1 deletion b/‎AdaBoost/adaboost_cpp.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎decision_tree/decision_tree.py‎
Lines changed: 5 additions & 5 deletions b/‎decision_tree/decision_tree.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎knn/distance_test.py‎
Lines changed: 4 additions & 3 deletions b/‎knn/distance_test.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎knn/knn.py‎
Lines changed: 10 additions & 10 deletions b/‎knn/knn.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎logistic_regression/__pycache__/binary_perceptron.cpython-35.pyc‎
2.9 KB b/‎logistic_regression/__pycache__/binary_perceptron.cpython-35.pyc‎
2.9 KB
diff --git a/‎logistic_regression/binary_perceptron.py‎
Lines changed: 11 additions & 11 deletions b/‎logistic_regression/binary_perceptron.py‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎logistic_regression/competation.py‎
Lines changed: 5 additions & 5 deletions b/‎logistic_regression/competation.py‎
Lines changed: 5 additions & 5 deletions
@@ -12,7 +12,7 @@
 import numpy as np
 import pandas as pd
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 sign_time_count = 0
@@ -48,7 +48,7 @@ def _train_less_than_(self):
 
         for i in self.indexes:
             score = 0
-            for j in xrange(self.N):
+            for j in range(self.N):
                 val = -1
                 if self.X[j]<i:
                     val = 1
@@ -74,7 +74,7 @@ def _train_more_than_(self):
 
         for i in self.indexes:
             score = 0
-            for j in xrange(self.N):
+            for j in range(self.N):
                 val = 1
                 if self.X[j]<i:
                     val = -1
@@ -150,7 +150,7 @@ def _Z_(self,index,classifier):
 
         Z = 0
 
-        for i in xrange(self.N):
+        for i in range(self.N):
             Z += self._w_(index,classifier,i)
 
         return Z
@@ -159,16 +159,16 @@ def train(self,features,labels):
 
         self._init_parameters_(features,labels)
 
-        for times in xrange(self.M):
+        for times in range(self.M):
             logging.debug('iterater %d' % times)
 
             time1 = time.time()
             map_time = 0
 
             best_classifier = (100000,None,None)        #(误差率,针对的特征，分类器)
-            for i in xrange(self.n):
+            for i in range(self.n):
                 map_time -= time.time()
-                features = map(lambda x:x[i],self.X)
+                features = list(map(lambda x:x[i],self.X))
                 map_time += time.time()
                 classifier = Sign(features,self.Y,self.w)
                 error_score = classifier.train()
@@ -179,10 +179,10 @@ def train(self,features,labels):
             em = best_classifier[0]
 
             # 分析用，之后删除 开始
-            print 'em is %s, index is %d' % (str(em),best_classifier[1])
+            print('em is %s, index is %d' % (str(em),best_classifier[1]))
             time2 = time.time()
             global sign_time_count
-            print '总运行时间:%s, 那两段关键代码运行时间:%s, map的时间是:%s' % (str(time2-time1),str(sign_time_count),str(map_time))
+            print('总运行时间:%s, 那两段关键代码运行时间:%s, map的时间是:%s' % (str(time2-time1),str(sign_time_count),str(map_time)))
             sign_time_count = 0
             # 分析用，之后删除  结束
 
@@ -196,13 +196,13 @@ def train(self,features,labels):
             Z = self._Z_(best_classifier[1],best_classifier[2])
 
             # 计算训练集权值分布 8.4
-            for i in xrange(self.N):
+            for i in range(self.N):
                 self.w[i] = self._w_(best_classifier[1],best_classifier[2],i)/Z
 
     def _predict_(self,feature):
 
         result = 0.0
-        for i in xrange(self.M):
+        for i in range(self.M):
             index = self.classifier[i][0]
             classifier = self.classifier[i][1]
 
@@ -225,7 +225,7 @@ def predict(self,features):
 # 二值化
 def binaryzation(img):
     cv_img = img.astype(np.uint8)
-    cv2.threshold(cv_img,50,1,cv2.cv.CV_THRESH_BINARY_INV,cv_img)
+    cv2.threshold(cv_img,50,1,cv2.THRESH_BINARY_INV,cv_img)
     return cv_img
 
 def binaryzation_features(trainset):
@@ -248,7 +248,7 @@ def binaryzation_features(trainset):
     logger = logging.getLogger()
     logger.setLevel(logging.DEBUG)
 
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
@@ -264,21 +264,29 @@ def binaryzation_features(trainset):
     train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.5, random_state=0)
 
     time_2 = time.time()
-    print 'read data cost ',time_2 - time_1,' second','\n'
+    print('read data cost ',time_2 - time_1,' second','\n')
 
-    print 'Start training'
-    train_labels = map(lambda x:2*x-1,train_labels)
+    print('Start training')
+    '''
+    In Python 3, map returns an iterator. If your function expects a list, 
+    the iterator has to be explicitly converted, like this:data = list(map(...))
+    "XXX"object is not subscriptable
+    说的是XXX对象不是可索引的，可索引的对象有list,tuple等
+    如果你确定你的XXX是一个可迭代对象的话，
+    可以尝试用list()函数把它转化为列表，然后通过索引读取元素
+    '''
+    train_labels = list(map(lambda x:2*x-1,train_labels))
     ada = AdaBoost()
     ada.train(train_features, train_labels)
 
     time_3 = time.time()
-    print 'training cost ',time_3 - time_2,' second','\n'
+    print('training cost ',time_3 - time_2,' second','\n')
 
-    print 'Start predicting'
+    print('Start predicting')
     test_predict = ada.predict(test_features)
     time_4 = time.time()
-    print 'predicting cost ',time_4 - time_3,' second','\n'
+    print('predicting cost ',time_4 - time_3,' second','\n')
 
     test_labels = map(lambda x:2*x-1,test_labels)
     score = accuracy_score(test_labels,test_predict)
-    print "The accruacy socre is ", score
+    print("The accruacy socre is ", score)
@@ -13,7 +13,7 @@
 import numpy as np
 import pandas as pd
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 sign_time_count = 0
 
@@ -7,7 +7,7 @@
 import pandas as pd
 
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 
@@ -29,7 +29,7 @@ def wrapper(*args, **kwargs):
 # 二值化
 def binaryzation(img):
     cv_img = img.astype(np.uint8)
-    cv2.threshold(cv_img,50,1,cv2.cv.CV_THRESH_BINARY_INV,cv_img)
+    cv2.threshold(cv_img,50,1,cv2.THRESH_BINARY_INV,cv_img)
     return cv_img
 
 @log
@@ -119,7 +119,7 @@ def recurse_train(train_set,train_label,features,epsilon):
         return Tree(LEAF,Class = label_set.pop())
 
     # 步骤2——如果features为空
-    (max_class,max_len) = max([(i,len(filter(lambda x:x==i,train_label))) for i in xrange(total_class)],key = lambda x:x[1])
+    (max_class,max_len) = max([(i,len(list(filter(lambda x:x==i,train_label))) for i in range(total_class))],key = lambda x:x[1])
 
     if len(features) == 0:
         return Tree(LEAF,Class = max_class)
@@ -150,7 +150,7 @@ def recurse_train(train_set,train_label,features,epsilon):
     for feature_value in feature_value_list:
 
         index = []
-        for i in xrange(len(train_label)):
+        for i in range(len(train_label)):
             if train_set[i][max_feature] == feature_value:
                 index.append(i)
 
@@ -197,7 +197,7 @@ def predict(test_set,tree):
     test_predict = predict(test_features,tree)
     score = accuracy_score(test_labels,test_predict)
 
-    print "The accruacy socre is ", score
+    print("The accruacy socre is ", score)
 
 
 
 
@@ -13,12 +13,13 @@
 
     time_1 = time.time()
 
-    print np.sqrt(np.sum(np.square(vec_1 - vec_2)))
 
+    print(np.sqrt(np.sum(np.square(vec_1 - vec_2))))
+###core Linear Algebra Tools,norm,inv,solve,det,lstsq,pinv,matrix_power
     time_2 = time.time()
-    print time_2-time_1
+    print(time_2-time_1)
 
     print np.linalg.norm(vec_1 - vec_2)
 
     time_3 = time.time()
-    print time_3-time_2
+    print(time_3-time_2)
@@ -6,7 +6,7 @@
 import random
 import time
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 
@@ -35,7 +35,7 @@ def Predict(testset,trainset,train_labels):
 
     for test_vec in testset:
         # 输出当前运行的测试用例坐标，用于测试
-        print count
+        print(count)
         count += 1
 
         knn_list = []       # 当前k个最近邻居
@@ -93,7 +93,7 @@ def Predict(testset,trainset,train_labels):
 
 if __name__ == '__main__':
 
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
@@ -111,17 +111,17 @@ def Predict(testset,trainset,train_labels):
     # print train_features.shape
 
     time_2 = time.time()
-    print 'read data cost ',time_2 - time_1,' second','\n'
+    print('read data cost ',time_2 - time_1,' second','\n')
 
-    print 'Start training'
-    print 'knn do not need to train'
+    print('Start training')
+    print('knn do not need to train')
     time_3 = time.time()
-    print 'training cost ',time_3 - time_2,' second','\n'
+    print('training cost ',time_3 - time_2,' second','\n')
 
-    print 'Start predicting'
+    print('Start predicting')
     test_predict = Predict(test_features,train_features,train_labels)
     time_4 = time.time()
-    print 'predicting cost ',time_4 - time_3,' second','\n'
+    print('predicting cost ',time_4 - time_3,' second','\n')
 
     score = accuracy_score(test_labels,test_predict)
-    print "The accruacy socre is ", score
+    print("The accruacy socre is ", score)
@@ -12,7 +12,7 @@
 import random
 import time
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 
@@ -23,7 +23,7 @@ def __init__(self):
         self.max_iteration = 5000
 
     def predict_(self, x):
-        wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))])
+        wx = sum([self.w[j] * x[j] for j in range(len(self.w))])
         return int(wx > 0)
 
     def train(self, features, labels):
@@ -37,15 +37,15 @@ def train(self, features, labels):
             x = list(features[index])
             x.append(1.0)
             y = 2 * labels[index] - 1
-            wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))])
+            wx = sum([self.w[j] * x[j] for j in range(len(self.w))])
 
             if wx * y > 0:
                 correct_count += 1
                 if correct_count > self.max_iteration:
                     break
                 continue
 
-            for i in xrange(len(self.w)):
+            for i in range(len(self.w)):
                 self.w[i] += self.learning_step * (y * x[i])
 
     def predict(self,features):
@@ -59,7 +59,7 @@ def predict(self,features):
 
 if __name__ == '__main__':
 
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
@@ -76,19 +76,19 @@ def predict(self,features):
     # print train_features.shape
 
     time_2 = time.time()
-    print 'read data cost ', time_2 - time_1, ' second', '\n'
+    print('read data cost ', time_2 - time_1, ' second', '\n')
 
-    print 'Start training'
+    print('Start training')
     p = Perceptron()
     p.train(train_features, train_labels)
 
     time_3 = time.time()
-    print 'training cost ', time_3 - time_2, ' second', '\n'
+    print('training cost ', time_3 - time_2, ' second', '\n')
 
-    print 'Start predicting'
+    print('Start predicting')
     test_predict = p.predict(test_features)
     time_4 = time.time()
-    print 'predicting cost ', time_4 - time_3, ' second', '\n'
+    print('predicting cost ', time_4 - time_3, ' second', '\n')
 
     score = accuracy_score(test_labels, test_predict)
-    print "The accruacy socre is ", score
+    print("The accruacy socre is ", score)
@@ -11,7 +11,7 @@
 from binary_perceptron import Perceptron
 from logistic_regression import LogisticRegression
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 if __name__ == '__main__':
@@ -29,8 +29,8 @@
 
     writer = csv.writer(file('result.csv', 'wb'))
 
-    for time in xrange(test_time):
-        print 'iterater time %d' % time
+    for time in range(test_time):
+        print('iterater time %d' % time)
 
         train_features, test_features, train_labels, test_labels = train_test_split(
             imgs, labels, test_size=0.33, random_state=23323)
@@ -44,7 +44,7 @@
         p_score = accuracy_score(test_labels, p_predict)
         lr_score = accuracy_score(test_labels, lr_predict)
 
-        print 'perceptron accruacy score ', p_score
-        print 'logistic Regression accruacy score ', lr_score
+        print('perceptron accruacy score ', p_score)
+        print('logistic Regression accruacy score ', lr_score)
 
         writer.writerow([time,p_score,lr_score])