diff --git a/AdaBoost/adaboost.py b/AdaBoost/adaboost.py
index 1851f3b..645973f 100644
--- a/AdaBoost/adaboost.py
+++ b/AdaBoost/adaboost.py
@@ -12,7 +12,7 @@
 import numpy as np
 import pandas as pd
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 sign_time_count = 0
@@ -48,7 +48,7 @@ def _train_less_than_(self):
 
         for i in self.indexes:
             score = 0
-            for j in xrange(self.N):
+            for j in range(self.N):
                 val = -1
                 if self.X[j]<i:
                     val = 1
@@ -74,7 +74,7 @@ def _train_more_than_(self):
 
         for i in self.indexes:
             score = 0
-            for j in xrange(self.N):
+            for j in range(self.N):
                 val = 1
                 if self.X[j]<i:
                     val = -1
@@ -150,7 +150,7 @@ def _Z_(self,index,classifier):
 
         Z = 0
 
-        for i in xrange(self.N):
+        for i in range(self.N):
             Z += self._w_(index,classifier,i)
 
         return Z
@@ -159,16 +159,16 @@ def train(self,features,labels):
 
         self._init_parameters_(features,labels)
 
-        for times in xrange(self.M):
+        for times in range(self.M):
             logging.debug('iterater %d' % times)
 
             time1 = time.time()
             map_time = 0
 
             best_classifier = (100000,None,None)        #(误差率,针对的特征，分类器)
-            for i in xrange(self.n):
+            for i in range(self.n):
                 map_time -= time.time()
-                features = map(lambda x:x[i],self.X)
+                features = list(map(lambda x:x[i],self.X))
                 map_time += time.time()
                 classifier = Sign(features,self.Y,self.w)
                 error_score = classifier.train()
@@ -179,10 +179,10 @@ def train(self,features,labels):
             em = best_classifier[0]
 
             # 分析用，之后删除 开始
-            print 'em is %s, index is %d' % (str(em),best_classifier[1])
+            print('em is %s, index is %d' % (str(em),best_classifier[1]))
             time2 = time.time()
             global sign_time_count
-            print '总运行时间:%s, 那两段关键代码运行时间:%s, map的时间是:%s' % (str(time2-time1),str(sign_time_count),str(map_time))
+            print('总运行时间:%s, 那两段关键代码运行时间:%s, map的时间是:%s' % (str(time2-time1),str(sign_time_count),str(map_time)))
             sign_time_count = 0
             # 分析用，之后删除  结束
 
@@ -196,13 +196,13 @@ def train(self,features,labels):
             Z = self._Z_(best_classifier[1],best_classifier[2])
 
             # 计算训练集权值分布 8.4
-            for i in xrange(self.N):
+            for i in range(self.N):
                 self.w[i] = self._w_(best_classifier[1],best_classifier[2],i)/Z
 
     def _predict_(self,feature):
 
         result = 0.0
-        for i in xrange(self.M):
+        for i in range(self.M):
             index = self.classifier[i][0]
             classifier = self.classifier[i][1]
 
@@ -225,7 +225,7 @@ def predict(self,features):
 # 二值化
 def binaryzation(img):
     cv_img = img.astype(np.uint8)
-    cv2.threshold(cv_img,50,1,cv2.cv.CV_THRESH_BINARY_INV,cv_img)
+    cv2.threshold(cv_img,50,1,cv2.THRESH_BINARY_INV,cv_img)
     return cv_img
 
 def binaryzation_features(trainset):
@@ -248,7 +248,7 @@ def binaryzation_features(trainset):
     logger = logging.getLogger()
     logger.setLevel(logging.DEBUG)
 
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
@@ -264,21 +264,29 @@ def binaryzation_features(trainset):
     train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.5, random_state=0)
 
     time_2 = time.time()
-    print 'read data cost ',time_2 - time_1,' second','\n'
+    print('read data cost ',time_2 - time_1,' second','\n')
 
-    print 'Start training'
-    train_labels = map(lambda x:2*x-1,train_labels)
+    print('Start training')
+    '''
+    In Python 3, map returns an iterator. If your function expects a list, 
+    the iterator has to be explicitly converted, like this:data = list(map(...))
+    "XXX"object is not subscriptable
+    说的是XXX对象不是可索引的，可索引的对象有list,tuple等
+    如果你确定你的XXX是一个可迭代对象的话，
+    可以尝试用list()函数把它转化为列表，然后通过索引读取元素
+    '''
+    train_labels = list(map(lambda x:2*x-1,train_labels))
     ada = AdaBoost()
     ada.train(train_features, train_labels)
 
     time_3 = time.time()
-    print 'training cost ',time_3 - time_2,' second','\n'
+    print('training cost ',time_3 - time_2,' second','\n')
 
-    print 'Start predicting'
+    print('Start predicting')
     test_predict = ada.predict(test_features)
     time_4 = time.time()
-    print 'predicting cost ',time_4 - time_3,' second','\n'
+    print('predicting cost ',time_4 - time_3,' second','\n')
 
     test_labels = map(lambda x:2*x-1,test_labels)
     score = accuracy_score(test_labels,test_predict)
-    print "The accruacy socre is ", score
+    print("The accruacy socre is ", score)
diff --git a/AdaBoost/adaboost_cpp.py b/AdaBoost/adaboost_cpp.py
index d803cba..8784c18 100644
--- a/AdaBoost/adaboost_cpp.py
+++ b/AdaBoost/adaboost_cpp.py
@@ -13,7 +13,7 @@
 import numpy as np
 import pandas as pd
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 sign_time_count = 0
diff --git a/decision_tree/decision_tree.py b/decision_tree/decision_tree.py
index d1f837f..b4e32d6 100644
--- a/decision_tree/decision_tree.py
+++ b/decision_tree/decision_tree.py
@@ -7,7 +7,7 @@
 import pandas as pd
 
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 
@@ -29,7 +29,7 @@ def wrapper(*args, **kwargs):
 # 二值化
 def binaryzation(img):
     cv_img = img.astype(np.uint8)
-    cv2.threshold(cv_img,50,1,cv2.cv.CV_THRESH_BINARY_INV,cv_img)
+    cv2.threshold(cv_img,50,1,cv2.THRESH_BINARY_INV,cv_img)
     return cv_img
 
 @log
@@ -119,7 +119,7 @@ def recurse_train(train_set,train_label,features,epsilon):
         return Tree(LEAF,Class = label_set.pop())
 
     # 步骤2——如果features为空
-    (max_class,max_len) = max([(i,len(filter(lambda x:x==i,train_label))) for i in xrange(total_class)],key = lambda x:x[1])
+    (max_class,max_len) = max([(i,len(list(filter(lambda x:x==i,train_label))) for i in range(total_class))],key = lambda x:x[1])
 
     if len(features) == 0:
         return Tree(LEAF,Class = max_class)
@@ -150,7 +150,7 @@ def recurse_train(train_set,train_label,features,epsilon):
     for feature_value in feature_value_list:
 
         index = []
-        for i in xrange(len(train_label)):
+        for i in range(len(train_label)):
             if train_set[i][max_feature] == feature_value:
                 index.append(i)
 
@@ -197,7 +197,7 @@ def predict(test_set,tree):
     test_predict = predict(test_features,tree)
     score = accuracy_score(test_labels,test_predict)
 
-    print "The accruacy socre is ", score
+    print("The accruacy socre is ", score)
 
 
 
diff --git a/knn/distance_test.py b/knn/distance_test.py
index db60543..d41fe64 100644
--- a/knn/distance_test.py
+++ b/knn/distance_test.py
@@ -13,12 +13,13 @@
 
     time_1 = time.time()
 
-    print np.sqrt(np.sum(np.square(vec_1 - vec_2)))
 
+    print(np.sqrt(np.sum(np.square(vec_1 - vec_2))))
+###core Linear Algebra Tools,norm,inv,solve,det,lstsq,pinv,matrix_power
     time_2 = time.time()
-    print time_2-time_1
+    print(time_2-time_1)
 
     print np.linalg.norm(vec_1 - vec_2)
 
     time_3 = time.time()
-    print time_3-time_2
+    print(time_3-time_2)
diff --git a/knn/knn.py b/knn/knn.py
index 49a71f4..d92f375 100644
--- a/knn/knn.py
+++ b/knn/knn.py
@@ -1,12 +1,15 @@
-#encoding=utf-8
+"""
+这里我用ndarray把predict函数里的部分重写了一下
+更简洁了一些
+"""
+
 
 import pandas as pd
 import numpy as np
 import cv2
-import random
 import time
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 
@@ -17,7 +20,7 @@ def get_hog_features(trainset):
     hog = cv2.HOGDescriptor('../hog.xml')
 
     for img in trainset:
-        img = np.reshape(img,(28,28))
+        img = np.reshape(img, (28, 28))
         cv_img = img.astype(np.uint8)
 
         hog_feature = hog.compute(cv_img)
@@ -25,22 +28,22 @@ def get_hog_features(trainset):
         features.append(hog_feature)
 
     features = np.array(features)
-    features = np.reshape(features,(-1,324))
+    features = np.reshape(features, (-1, 324))
 
     return features
 
-def Predict(testset,trainset,train_labels):
+
+def Predict(testset, trainset, train_labels, k=5):
     predict = []
     count = 0
 
     for test_vec in testset:
         # 输出当前运行的测试用例坐标，用于测试
-        print count
         count += 1
+        if count % 5000 == 0:
+            print(count)
 
-        knn_list = []       # 当前k个最近邻居
-        max_index = -1      # 当前k个最近邻居中距离最远点的坐标
-        max_dist = 0        # 当前k个最近邻居中距离最远点的距离
+        knn_list = np.zeros((1, 2))    # 初始化，存放当前k个最近邻居
 
         # 先将前k个点放入k个最近邻居中，填充满knn_list
         for i in range(k):
@@ -48,80 +51,72 @@ def Predict(testset,trainset,train_labels):
             train_vec = trainset[i]
 
             dist = np.linalg.norm(train_vec - test_vec)         # 计算两个点的欧氏距离
-
-            knn_list.append((dist,label))
+            knn_list = np.append(knn_list, [[dist, label]], axis=0)
 
         # 剩下的点
-        for i in range(k,len(train_labels)):
+        for i in range(k, len(train_labels)):
             label = train_labels[i]
             train_vec = trainset[i]
 
             dist = np.linalg.norm(train_vec - test_vec)         # 计算两个点的欧氏距离
 
-            # 寻找10个邻近点钟距离最远的点
-            if max_index < 0:
-                for j in range(k):
-                    if max_dist < knn_list[j][0]:
-                        max_index = j
-                        max_dist = knn_list[max_index][0]
+            # 寻找10个邻近点中距离最远的点
+            max_index = np.argmax(knn_list[:, 0])
+            max_dist = np.max(knn_list[:, 0])
 
             # 如果当前k个最近邻居中存在点距离比当前点距离远，则替换
             if dist < max_dist:
-                knn_list[max_index] = (dist,label)
-                max_index = -1
-                max_dist = 0
-
+                knn_list[max_index] = [dist, label]
 
+        # 上面代码计算全部运算完之后，即说明已经找到了离当前test_vec最近的10个train_vec
         # 统计选票
         class_total = 10
         class_count = [0 for i in range(class_total)]
-        for dist,label in knn_list:
-            class_count[label] += 1
+        for dist, label in knn_list:
+            class_count[int(label)] += 1
 
-        # 找出最大选票
-        mmax = max(class_count)
+        # 找出最大选票数
+        label_max = max(class_count)
 
-        # 找出最大选票标签
-        for i in range(class_total):
-            if mmax == class_count[i]:
-                predict.append(i)
-                break
+        # 最大选票数对应的class
+        predict.append(class_count.index(label_max))
 
     return np.array(predict)
 
-k = 10
 
 if __name__ == '__main__':
 
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
-    raw_data = pd.read_csv('../data/train.csv',header=0)
+    raw_data = pd.read_csv('../data/train.csv', header=0)
     data = raw_data.values
-
-    imgs = data[0::,1::]
-    labels = data[::,0]
+    imgs = data[:, 1:]
+    labels = data[:, 0]
 
     features = get_hog_features(imgs)
 
     # 选取 2/3 数据作为训练集， 1/3 数据作为测试集
-    train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.33, random_state=23323)
+    train_features, test_features, train_labels, test_labels =  \
+        train_test_split(features, labels, test_size=0.33, random_state=23323)
     # print train_features.shape
     # print train_features.shape
 
+    k = 10  # 设置k的大小
+
     time_2 = time.time()
-    print 'read data cost ',time_2 - time_1,' second','\n'
+    print('read data cost ', time_2 - time_1, ' second', '\n')
 
-    print 'Start training'
-    print 'knn do not need to train'
+    print('Start training')
+    print('knn do not need to train')
     time_3 = time.time()
-    print 'training cost ',time_3 - time_2,' second','\n'
+    print('training cost ', time_3 - time_2, ' second', '\n')
 
-    print 'Start predicting'
-    test_predict = Predict(test_features,train_features,train_labels)
+    print('Start predicting')
+    test_predict = Predict(test_features, train_features, train_labels, k)
     time_4 = time.time()
-    print 'predicting cost ',time_4 - time_3,' second','\n'
+    print('predicting cost ', time_4 - time_3, ' second', '\n')
 
-    score = accuracy_score(test_labels,test_predict)
-    print "The accruacy socre is ", score
\ No newline at end of file
+    score = accuracy_score(test_labels, test_predict)
+    print("The accruacy socre is ", score)
diff --git a/logistic_regression/__pycache__/binary_perceptron.cpython-35.pyc b/logistic_regression/__pycache__/binary_perceptron.cpython-35.pyc
new file mode 100644
index 0000000..2ff2719
Binary files /dev/null and b/logistic_regression/__pycache__/binary_perceptron.cpython-35.pyc differ
diff --git a/logistic_regression/binary_perceptron.py b/logistic_regression/binary_perceptron.py
index e6ff6e0..187ba8c 100644
--- a/logistic_regression/binary_perceptron.py
+++ b/logistic_regression/binary_perceptron.py
@@ -12,7 +12,7 @@
 import random
 import time
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 
@@ -23,7 +23,7 @@ def __init__(self):
         self.max_iteration = 5000
 
     def predict_(self, x):
-        wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))])
+        wx = sum([self.w[j] * x[j] for j in range(len(self.w))])
         return int(wx > 0)
 
     def train(self, features, labels):
@@ -37,7 +37,7 @@ def train(self, features, labels):
             x = list(features[index])
             x.append(1.0)
             y = 2 * labels[index] - 1
-            wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))])
+            wx = sum([self.w[j] * x[j] for j in range(len(self.w))])
 
             if wx * y > 0:
                 correct_count += 1
@@ -45,7 +45,7 @@ def train(self, features, labels):
                     break
                 continue
 
-            for i in xrange(len(self.w)):
+            for i in range(len(self.w)):
                 self.w[i] += self.learning_step * (y * x[i])
 
     def predict(self,features):
@@ -59,7 +59,7 @@ def predict(self,features):
 
 if __name__ == '__main__':
 
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
@@ -76,19 +76,19 @@ def predict(self,features):
     # print train_features.shape
 
     time_2 = time.time()
-    print 'read data cost ', time_2 - time_1, ' second', '\n'
+    print('read data cost ', time_2 - time_1, ' second', '\n')
 
-    print 'Start training'
+    print('Start training')
     p = Perceptron()
     p.train(train_features, train_labels)
 
     time_3 = time.time()
-    print 'training cost ', time_3 - time_2, ' second', '\n'
+    print('training cost ', time_3 - time_2, ' second', '\n')
 
-    print 'Start predicting'
+    print('Start predicting')
     test_predict = p.predict(test_features)
     time_4 = time.time()
-    print 'predicting cost ', time_4 - time_3, ' second', '\n'
+    print('predicting cost ', time_4 - time_3, ' second', '\n')
 
     score = accuracy_score(test_labels, test_predict)
-    print "The accruacy socre is ", score
+    print("The accruacy socre is ", score)
diff --git a/logistic_regression/competation.py b/logistic_regression/competation.py
index cba8f01..7386ef1 100644
--- a/logistic_regression/competation.py
+++ b/logistic_regression/competation.py
@@ -11,7 +11,7 @@
 from binary_perceptron import Perceptron
 from logistic_regression import LogisticRegression
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 if __name__ == '__main__':
@@ -29,8 +29,8 @@
 
     writer = csv.writer(file('result.csv', 'wb'))
 
-    for time in xrange(test_time):
-        print 'iterater time %d' % time
+    for time in range(test_time):
+        print('iterater time %d' % time)
 
         train_features, test_features, train_labels, test_labels = train_test_split(
             imgs, labels, test_size=0.33, random_state=23323)
@@ -44,7 +44,7 @@
         p_score = accuracy_score(test_labels, p_predict)
         lr_score = accuracy_score(test_labels, lr_predict)
 
-        print 'perceptron accruacy score ', p_score
-        print 'logistic Regression accruacy score ', lr_score
+        print('perceptron accruacy score ', p_score)
+        print('logistic Regression accruacy score ', lr_score)
 
         writer.writerow([time,p_score,lr_score])
diff --git a/logistic_regression/logistic_regression.py b/logistic_regression/logistic_regression.py
index 84e8233..5da8901 100644
--- a/logistic_regression/logistic_regression.py
+++ b/logistic_regression/logistic_regression.py
@@ -10,7 +10,7 @@
 import random
 
 import pandas as pd
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 
@@ -21,7 +21,7 @@ def __init__(self):
         self.max_iteration = 5000
 
     def predict_(self,x):
-        wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))])
+        wx = sum([self.w[j] * x[j] for j in range(len(self.w))])
         exp_wx = math.exp(wx)
 
         predict1 = exp_wx / (1 + exp_wx)
@@ -55,10 +55,10 @@ def train(self,features, labels):
             time += 1
             correct_count = 0
 
-            wx = sum([self.w[i] * x[i] for i in xrange(len(self.w))])
+            wx = sum([self.w[i] * x[i] for i in range(len(self.w))])
             exp_wx = math.exp(wx)
 
-            for i in xrange(len(self.w)):
+            for i in range(len(self.w)):
                 self.w[i] -= self.learning_step * \
                     (-y * x[i] + float(x[i] * exp_wx) / float(1 + exp_wx))
 
@@ -74,7 +74,7 @@ def predict(self,features):
         return labels
 
 if __name__ == "__main__":
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
@@ -89,19 +89,19 @@ def predict(self,features):
     train_features, test_features, train_labels, test_labels = train_test_split(imgs, labels, test_size=0.33, random_state=23323)
 
     time_2 = time.time()
-    print 'read data cost ',time_2 - time_1,' second','\n'
+    print('read data cost ',time_2 - time_1,' second','\n')
 
-    print 'Start training'
+    print('Start training')
     lr = LogisticRegression()
     lr.train(train_features, train_labels)
 
     time_3 = time.time()
-    print 'training cost ',time_3 - time_2,' second','\n'
+    print('training cost ',time_3 - time_2,' second','\n')
 
-    print 'Start predicting'
+    print('Start predicting')
     test_predict = lr.predict(test_features)
     time_4 = time.time()
-    print 'predicting cost ',time_4 - time_3,' second','\n'
+    print('predicting cost ',time_4 - time_3,' second','\n')
 
     score = accuracy_score(test_labels,test_predict)
-    print "The accruacy socre is ", score
+    print("The accruacy socre is ", score)
diff --git a/maxENT/maxENT.py b/maxENT/maxENT.py
index 3acad95..e5ba71e 100644
--- a/maxENT/maxENT.py
+++ b/maxENT/maxENT.py
@@ -15,7 +15,7 @@
 
 from collections import defaultdict
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 
@@ -47,7 +47,7 @@ def cal_Pxy_Px(self, X, Y):
         self.Pxy = defaultdict(int)
         self.Px = defaultdict(int)
 
-        for i in xrange(len(X)):
+        for i in range(len(X)):
             x_, y = X[i], Y[i]
             self.Y_.add(y)
 
@@ -60,7 +60,7 @@ def cal_EPxy(self):
         计算书中82页最下面那个期望
         '''
         self.EPxy = defaultdict(float)
-        for id in xrange(self.n):
+        for id in range(self.n):
             (x, y) = self.id2xy[id]
             self.EPxy[id] = float(self.Pxy[(x, y)]) / float(self.N)
 
@@ -84,7 +84,7 @@ def cal_EPx(self):
         '''
         计算书83页最上面那个期望
         '''
-        self.EPx = [0.0 for i in xrange(self.n)]
+        self.EPx = [0.0 for i in range(self.n)]
 
         for i, X in enumerate(self.X_):
             Pyxs = self.cal_probality(X)
@@ -104,19 +104,19 @@ def train(self, X, Y):
         self.w = [0.0 for i in range(self.n)]
 
         max_iteration = 1000
-        for times in xrange(max_iteration):
-            print 'iterater times %d' % times
+        for times in range(max_iteration):
+            print('iterater times %d' % times)
             sigmas = []
             self.cal_EPx()
 
-            for i in xrange(self.n):
+            for i in range(self.n):
                 sigma = 1 / self.M * math.log(self.EPxy[i] / self.EPx[i])
                 sigmas.append(sigma)
 
             # if len(filter(lambda x: abs(x) >= 0.01, sigmas)) == 0:
             #     break
 
-            self.w = [self.w[i] + sigmas[i] for i in xrange(self.n)]
+            self.w = [self.w[i] + sigmas[i] for i in range(self.n)]
 
     def predict(self, testset):
         results = []
@@ -142,7 +142,7 @@ def rebuild_features(features):
 
 if __name__ == "__main__":
 
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
@@ -160,19 +160,19 @@ def rebuild_features(features):
     test_features = rebuild_features(test_features)
 
     time_2 = time.time()
-    print 'read data cost ', time_2 - time_1, ' second', '\n'
+    print('read data cost ', time_2 - time_1, ' second', '\n')
 
-    print 'Start training'
+    print('Start training')
     met = MaxEnt()
     met.train(train_features, train_labels)
 
     time_3 = time.time()
-    print 'training cost ', time_3 - time_2, ' second', '\n'
+    print('training cost ', time_3 - time_2, ' second', '\n')
 
-    print 'Start predicting'
+    print('Start predicting')
     test_predict = met.predict(test_features)
     time_4 = time.time()
-    print 'predicting cost ', time_4 - time_3, ' second', '\n'
+    print('predicting cost ', time_4 - time_3, ' second', '\n')
 
     score = accuracy_score(test_labels, test_predict)
-    print "The accruacy socre is ", score
+    print("The accruacy socre is ", score)
diff --git a/naive_bayes/naive_bayes.py b/naive_bayes/naive_bayes.py
index 07c2091..099e54d 100644
--- a/naive_bayes/naive_bayes.py
+++ b/naive_bayes/naive_bayes.py
@@ -6,13 +6,13 @@
 import random
 import time
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 # 二值化
 def binaryzation(img):
     cv_img = img.astype(np.uint8)
-    cv2.threshold(cv_img,50,1,cv2.cv.CV_THRESH_BINARY_INV,cv_img)
+    cv2.threshold(cv_img,50,1,cv2.THRESH_BINARY_INV,cv_img)
     return cv_img
 
 def Train(trainset,train_labels):
@@ -83,7 +83,7 @@ def Predict(testset,prior_probability,conditional_probability):
 
 if __name__ == '__main__':
 
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
@@ -99,17 +99,17 @@ def Predict(testset,prior_probability,conditional_probability):
     # print train_features.shape
 
     time_2 = time.time()
-    print 'read data cost ',time_2 - time_1,' second','\n'
+    print('read data cost ',time_2 - time_1,' second','\n')
 
-    print 'Start training'
+    print('Start training')
     prior_probability,conditional_probability = Train(train_features,train_labels)
     time_3 = time.time()
-    print 'training cost ',time_3 - time_2,' second','\n'
+    print('training cost ',time_3 - time_2,' second','\n')
 
-    print 'Start predicting'
+    print('Start predicting')
     test_predict = Predict(test_features,prior_probability,conditional_probability)
     time_4 = time.time()
-    print 'predicting cost ',time_4 - time_3,' second','\n'
+    print('predicting cost ',time_4 - time_3,' second','\n')
 
     score = accuracy_score(test_labels,test_predict)
-    print "The accruacy socre is ", score
\ No newline at end of file
+    print("The accruacy socre is ", score)
\ No newline at end of file
diff --git a/notebooks/1-perceptron.ipynb b/notebooks/1-perceptron.ipynb
new file mode 100644
index 0000000..ec5083d
--- /dev/null
+++ b/notebooks/1-perceptron.ipynb
@@ -0,0 +1,101 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "这里使用sigmoid函数，处理的是二分类问题"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import cv2\n",
+    "import random\n",
+    "import time\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import accuracy_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "class Perceptron(object):\n",
+    "\n",
+    "    def __init__(self):\n",
+    "        self.learning_step = 0.00001\n",
+    "        self.max_iteration = 5000\n",
+    "        \n",
+    "    def train(self, features, labels):\n",
+    "        self.w = [0.0] * (len(features[0]) + 1)\n",
+    "        correct_count = 0\n",
+    "        time = 0\n",
+    "        \n",
+    "        while time < self.max_iteration:\n",
+    "            index = random.randint(0, len(labels) - 1)\n",
+    "            x = list(features[index])\n",
+    "            x.append(1.0)\n",
+    "            y = 2 * labels[index] - 1\n",
+    "            wx = sum([self.w[j] * x[j] for j in range(len(self.w))])\n",
+    "            \n",
+    "            if wx * y > 0:\n",
+    "                correct_count += 1\n",
+    "                if correct_count > self.max_iteration:\n",
+    "                    break\n",
+    "                continue\n",
+    "\n",
+    "            for i in range(len(self.w)):\n",
+    "                self.w[i] += self.learning_step * (y * x[i])\n",
+    "\n",
+    "    def predict_(self, x):\n",
+    "        wx = sum([self.w[j] * x[j] for j in range(len(self.w))])\n",
+    "        return int(wx > 0)\n",
+    "\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [py35]",
+   "language": "python",
+   "name": "Python [py35]"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/2-knn.ipynb b/notebooks/2-knn.ipynb
new file mode 100644
index 0000000..914c941
--- /dev/null
+++ b/notebooks/2-knn.ipynb
@@ -0,0 +1,1005 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import cv2\n",
+    "import random\n",
+    "import time\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import accuracy_score"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "数据集是mnist，28\\*28，这里选择提取HOG特征，方向梯度直方图（Histogram of Oriented Gradient, HOG）："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "raw_data = pd.read_csv('../data/train.csv',header=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>label</th>\n",
+       "      <th>pixel0</th>\n",
+       "      <th>pixel1</th>\n",
+       "      <th>pixel2</th>\n",
+       "      <th>pixel3</th>\n",
+       "      <th>pixel4</th>\n",
+       "      <th>pixel5</th>\n",
+       "      <th>pixel6</th>\n",
+       "      <th>pixel7</th>\n",
+       "      <th>pixel8</th>\n",
+       "      <th>...</th>\n",
+       "      <th>pixel774</th>\n",
+       "      <th>pixel775</th>\n",
+       "      <th>pixel776</th>\n",
+       "      <th>pixel777</th>\n",
+       "      <th>pixel778</th>\n",
+       "      <th>pixel779</th>\n",
+       "      <th>pixel780</th>\n",
+       "      <th>pixel781</th>\n",
+       "      <th>pixel782</th>\n",
+       "      <th>pixel783</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 785 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \\\n",
+       "0      1       0       0       0       0       0       0       0       0   \n",
+       "1      0       0       0       0       0       0       0       0       0   \n",
+       "2      1       0       0       0       0       0       0       0       0   \n",
+       "3      4       0       0       0       0       0       0       0       0   \n",
+       "4      0       0       0       0       0       0       0       0       0   \n",
+       "\n",
+       "   pixel8    ...     pixel774  pixel775  pixel776  pixel777  pixel778  \\\n",
+       "0       0    ...            0         0         0         0         0   \n",
+       "1       0    ...            0         0         0         0         0   \n",
+       "2       0    ...            0         0         0         0         0   \n",
+       "3       0    ...            0         0         0         0         0   \n",
+       "4       0    ...            0         0         0         0         0   \n",
+       "\n",
+       "   pixel779  pixel780  pixel781  pixel782  pixel783  \n",
+       "0         0         0         0         0         0  \n",
+       "1         0         0         0         0         0  \n",
+       "2         0         0         0         0         0  \n",
+       "3         0         0         0         0         0  \n",
+       "4         0         0         0         0         0  \n",
+       "\n",
+       "[5 rows x 785 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(42000, 785)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "raw_data.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "两个冒号的语法：\n",
+    "    seq[start:end:step]\n",
+    "原来是\n",
+    "    imgs = data[0::,1::]\n",
+    "    labels = data[::,0]\n",
+    "没必要这样写"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "data = raw_data.values\n",
+    "imgs = data[:, 1:]\n",
+    "labels = data[:, 0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(42000, 784)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "imgs.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.unique(labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# 利用opencv获取图像hog特征\n",
+    "def get_hog_features(trainset):\n",
+    "    features = []\n",
+    "\n",
+    "    hog = cv2.HOGDescriptor('../hog.xml')\n",
+    "\n",
+    "    for img in trainset:\n",
+    "        img = np.reshape(img,(28,28))\n",
+    "        cv_img = img.astype(np.uint8)\n",
+    "\n",
+    "        hog_feature = hog.compute(cv_img)\n",
+    "        # hog_feature = np.transpose(hog_feature)\n",
+    "        features.append(hog_feature)\n",
+    "\n",
+    "    features = np.array(features)\n",
+    "    features = np.reshape(features,(-1,324))\n",
+    "\n",
+    "    return features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "features = get_hog_features(imgs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(42000, 324)"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "features.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(42000,)"
+      ]
+     },
+     "execution_count": 112,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "labels.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.33, random_state=23323)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 预测\n",
+    "\n",
+    "因为knn不需要训练，我们可以直接进行预测。不过因为4万个数据即使是预测也非常花时间，这里只取前100个样本做训练集，去30个样本做测试集："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 113,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "testset, trainset, train_labels = test_features[:30], train_features[:100], train_labels[:100]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 121,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "k = 10 # 最近的10个点\n",
+    "\n",
+    "predict = []\n",
+    "count = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "5.0"
+      ]
+     },
+     "execution_count": 122,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 计算两个点的欧氏距离\n",
+    "np.linalg.norm(np.array([0, 3]) - np.array([4, 0]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "time_1 = time.time()\n",
+    "\n",
+    "for test_vec in testset:\n",
+    "    # 输出当前运行的测试用例坐标，用于测试\n",
+    "    count += 1\n",
+    "    if count % 5000 == 0:\n",
+    "        print(count)\n",
+    "        \n",
+    "    knn_list = np.zeros((1, 2))    # 初始化，存放当前k个最近邻居\n",
+    "    \n",
+    "    # 先将前k个点放入k个最近邻居中，填充满knn_list\n",
+    "    for i in range(k):\n",
+    "        label = train_labels[i]\n",
+    "        train_vec = trainset[i]\n",
+    "\n",
+    "        dist = np.linalg.norm(train_vec - test_vec)         # 计算两个点的欧氏距离\n",
+    "        knn_list = np.append(knn_list, [[dist, label]], axis=0)\n",
+    "        \n",
+    "    # 剩下的点\n",
+    "    for i in range(k, len(train_labels)):\n",
+    "        label = train_labels[i]\n",
+    "        train_vec = trainset[i]\n",
+    "\n",
+    "        dist = np.linalg.norm(train_vec - test_vec)         # 计算两个点的欧氏距离\n",
+    "\n",
+    "        # 寻找10个邻近点中距离最远的点\n",
+    "        max_index = np.argmax(knn_list[:, 0])\n",
+    "        max_dist = np.max(knn_list[:, 0])\n",
+    "\n",
+    "        # 如果当前k个最近邻居中存在点距离比当前点距离远，则替换\n",
+    "        if dist < max_dist:\n",
+    "            knn_list[max_index] = [dist, label]\n",
+    "            \n",
+    "            \n",
+    "    # 上面代码计算全部运算完之后，即说明已经找到了离当前test_vec最近的10个train_vec\n",
+    "    # 统计选票\n",
+    "    class_total = 10\n",
+    "    class_count = [0 for i in range(class_total)]\n",
+    "    for dist, label in knn_list:\n",
+    "        class_count[int(label)] += 1\n",
+    "\n",
+    "    # 找出最大选票数\n",
+    "    label_max = max(class_count)\n",
+    "\n",
+    "    # 最大选票数对应的class\n",
+    "    predict.append(class_count.index(label_max))\n",
+    "\n",
+    "time_2 = time.time()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train time is 0.07612895965576172\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('train time is %s' % (time_2 - time_1))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "train time is 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('train time is %s' % (5-2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 0.        ,  0.        ],\n",
+       "       [ 1.10036302,  3.        ],\n",
+       "       [ 1.09803486,  3.        ],\n",
+       "       [ 1.09235775,  3.        ],\n",
+       "       [ 1.03992426,  3.        ],\n",
+       "       [ 1.04467952,  3.        ],\n",
+       "       [ 1.06501627,  3.        ],\n",
+       "       [ 0.93764162,  3.        ],\n",
+       "       [ 1.05351973,  3.        ],\n",
+       "       [ 1.04691565,  3.        ],\n",
+       "       [ 0.9816038 ,  3.        ]])"
+      ]
+     },
+     "execution_count": 106,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "knn_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([], dtype=float64)"
+      ]
+     },
+     "execution_count": 90,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "knn_list = np.array([])     # 当前k个最近邻居\n",
+    "    \n",
+    "# 先将前k个点放入k个最近邻居中，填充满knn_list\n",
+    "for i in range(k):\n",
+    "    label = train_labels[i]\n",
+    "    train_vec = trainset[i]\n",
+    "\n",
+    "    dist = np.linalg.norm(train_vec - test_vec)         # 计算两个点的欧氏距离\n",
+    "    knn_list_test = np.append(knn_list_test, [[8.5, 9]], axis=0)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 测试用\n",
+    "\n",
+    "下面自己写一个寻找10个领近点中距离最远的点："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 0.,  0.]])"
+      ]
+     },
+     "execution_count": 96,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "knn_list = np.zeros((1, 2))  # 当前k个最近邻居\n",
+    "knn_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "all the input array dimensions except for the concatenation axis must match exactly",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-94-9db999664314>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mknn_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m8.5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m9\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/Users/xu/anaconda/envs/py35/lib/python3.5/site-packages/numpy/lib/function_base.py\u001b[0m in \u001b[0;36mappend\u001b[0;34m(arr, values, axis)\u001b[0m\n\u001b[1;32m   5145\u001b[0m         \u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mravel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   5146\u001b[0m         \u001b[0maxis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5147\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mconcatenate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m: all the input array dimensions except for the concatenation axis must match exactly"
+     ]
+    }
+   ],
+   "source": [
+    "np.append(knn_list, [[8.5, 9]], axis=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 2.3,  1. ],\n",
+       "       [ 3.5,  1. ],\n",
+       "       [ 1.5,  4. ],\n",
+       "       [ 6.5,  2. ],\n",
+       "       [ 5.5,  8. ]])"
+      ]
+     },
+     "execution_count": 78,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "knn_list_test = np.array([[2.3, 1], [3.5, 1], [1.5, 4], [6.5, 2], [5.5, 8]])\n",
+    "# 每个元组里，第一个是距离，第二个是对应标签\n",
+    "knn_list_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 2.3,  3.5,  1.5,  6.5,  5.5])"
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "knn_list_test[:, 0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "knn_list_test[2] = [9.5, 5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 2.3,  1. ],\n",
+       "       [ 3.5,  1. ],\n",
+       "       [ 9.5,  5. ],\n",
+       "       [ 6.5,  2. ],\n",
+       "       [ 5.5,  8. ]])"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "knn_list_test"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "要想给一个ndarray添加一个元素，必须是同样的格式，即必须是`[[8.5, 9]]`，不能使`[8.5, 9]`，而且必须要用axis指定才行。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 2.3,  1. ],\n",
+       "       [ 3.5,  1. ],\n",
+       "       [ 9.5,  5. ],\n",
+       "       [ 6.5,  2. ],\n",
+       "       [ 5.5,  8. ],\n",
+       "       [ 8.5,  9. ],\n",
+       "       [ 8.5,  9. ]])"
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.append(knn_list_test, [[8.5, 9]], axis=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 2.3,  1. ],\n",
+       "       [ 3.5,  1. ],\n",
+       "       [ 9.5,  5. ],\n",
+       "       [ 6.5,  2. ],\n",
+       "       [ 5.5,  8. ],\n",
+       "       [ 8.5,  9. ]])"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "knn_list_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "knn_list_test[:, 0].argmax()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([], dtype=float64)"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.array([])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 输出评分\n",
+    "\n",
+    "统计结束后，得到predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 125,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "30"
+      ]
+     },
+     "execution_count": 125,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(predict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 127,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "test_predict = np.array(predict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "score = accuracy_score(test_labels[:30], test_predict)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 129,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6333333333333333"
+      ]
+     },
+     "execution_count": 129,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [py35]",
+   "language": "python",
+   "name": "Python [py35]"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/notebooks/3-naive_bayes.ipynb b/notebooks/3-naive_bayes.ipynb
new file mode 100644
index 0000000..d821bea
--- /dev/null
+++ b/notebooks/3-naive_bayes.ipynb
@@ -0,0 +1,1027 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import cv2\n",
+    "import random\n",
+    "import time\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import accuracy_score"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 数据预处理"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(28140, 784)\n",
+      "(13860, 784)\n"
+     ]
+    }
+   ],
+   "source": [
+    "raw_data = pd.read_csv('../data/train.csv',header=0)\n",
+    "data = raw_data.values\n",
+    "imgs = data[:, 1:]\n",
+    "labels = data[:, 0]\n",
+    "# 选取 2/3 数据作为训练集， 1/3 数据作为测试集\n",
+    "train_features, test_features, train_labels, test_labels = train_test_split(imgs, labels, test_size=0.33, random_state=23323)\n",
+    "\n",
+    "print(train_features.shape)\n",
+    "print(test_features.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# 二值化\n",
+    "def binaryzation(img):\n",
+    "    cv_img = img.astype(np.uint8)\n",
+    "    cv2.threshold(cv_img, 50, 1, cv2.THRESH_BINARY_INV, cv_img)\n",
+    "    return cv_img"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "    cv2.threshold(cv_img, 50, 1, cv2.THRESH_BINARY_INV, cv_img)\n",
+    "这句代码中，cv_img是输入的784个pixel数字（0~255），50表示阈值，1表示最大值，cv2.THRESH_BINARY_INV表示二值化的类型。这句代码表示pixel数字大于50的部分，为1，小于50的部分，为0。\n",
+    "\n",
+    "看一下经过二值化处理后是什么效果："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  63,\n",
+       "       255, 253, 253, 244, 120,  22,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  12,\n",
+       "       100, 209, 253, 252, 252, 252, 252, 187,   6,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "       144, 217, 252, 161, 253, 183, 153, 106, 218, 252,  70,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "        87, 180, 242, 243, 202,  68,  10,   3,   0,   0,  60, 194,  31,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   5, 184, 252, 226,  93,  23,   0,   0,   0,   0,   0,  32,\n",
+       "       142, 179,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0, 195, 252, 183,  29,   0,   0,   0,   0,   0,   0,\n",
+       "         0, 141, 252,  45,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,  48, 247, 173,  38,   0,   0,   0,   0,   0,\n",
+       "         0,   0,  26, 245, 252,  74,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0, 100, 229,  72,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0, 132, 252, 252, 131,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,  26, 153,  27,   0,   0,\n",
+       "         0,   0,   0,   0,   0,  34, 132, 252, 252,  98,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 242, 159,\n",
+       "       111,  58,  68,  77,   0,  15,  34,  14, 180, 252, 252,  21,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "       181, 253, 253, 253, 253, 114,   0,   0,   0, 100, 253, 253, 141,\n",
+       "        10,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,  50, 229, 252, 249, 120,  20,   0,   0,   0, 176, 252,\n",
+       "       252,  55,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,  29,  44,  42,   0,   0,   0,   0,   0,\n",
+       "       209, 252, 206,  10,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         3, 128, 251, 252,  92,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,  58, 252, 252, 238,  31,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,  39, 230, 252, 252, 143,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0, 116, 253, 252, 252,  20,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,  14, 226, 253, 252, 172,   4,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0, 159, 252, 253, 232,  30,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,  20, 216, 252, 253,\n",
+       "       186,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,\n",
+       "         0,   0,   0,   0])"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainset[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,\n",
+       "       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,\n",
+       "       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,\n",
+       "       0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,\n",
+       "       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,\n",
+       "       0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1], dtype=uint8)"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "binaryzation(trainset[0])     # 图片二值化"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "trainset, train_labels = train_features, train_labels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "class_num = 10\n",
+    "feature_len = 784"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(10,)\n",
+      "(10, 784, 2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 存放先验概率\n",
+    "prior_probability = np.zeros(class_num)      \n",
+    "print(prior_probability.shape)\n",
+    "# 存放条件概率\n",
+    "conditional_probability = np.zeros((class_num, feature_len, 2))    \n",
+    "print(conditional_probability.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "具体文章参考这一篇[机器学习通俗入门-朴素贝叶斯分类器](http://blog.csdn.net/TaiJi1985/article/details/73657994)\n",
+    "\n",
+    "$x^{(i)}$ 为一个28维的向量表示第i个样本， $y^{(i)}$ 为标注的类别。我们求解的目标是：\n",
+    "\n",
+    "$$f = \\underset{j}{arg maxP} (y^{(i)} = j \\mid x^{(i)}) $$\n",
+    "\n",
+    "简单说就是计算$p (y^{(i)} = 0 \\mid x^{(i)}) $，$p (y^{(i)} = 1 \\mid x^{(i)}) $ ... $p (y^{(i)} = 9 \\mid x^{(i)}) $，从中找出一个最大的，如果从属于第j个类的概率最大，那么就认为这张图片从属于j这个类。\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# 计算先验概率及条件概率\n",
+    "for i in range(len(train_labels)):\n",
+    "    img = binaryzation(trainset[i])     # 图片二值化\n",
+    "    label = train_labels[i]\n",
+    "\n",
+    "    prior_probability[label] += 1      # 每个label的图片各有多少个\n",
+    "\n",
+    "    for j in range(feature_len):\n",
+    "        conditional_probability[label][j][img[j]] += 1  \n",
+    "        # img[j]表示在像素点j上的值。如果是0，就会给第一个位置+1，如果是1，会给第二个位置+1\n",
+    "        # 比如下面的conditional_probability[0][0]，结果是[0, 2711]。\n",
+    "        # 说明在img中，标签为0的样本中，像素点为0的对应位置，在img中分别为0或1的数量"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 2711.,  3197.,  2828.,  2897.,  2751.,  2565.,  2769.,  2964.,\n",
+       "        2654.,  2804.])"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prior_probability"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "把上面的循环拆解分析一下，这里取第一个训练样本：\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "9"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "img1 = binaryzation(trainset[0])     # 图片二值化, 784个像素（feature）中，要么是0，要么是1\n",
+    "label1 = train_labels[0]\n",
+    "label1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "img1[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([    0.,  2711.])"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "print(conditional_probability[0][0]) # "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "说明在img中，标签为0的样本中，像素点为500的对应位置，在img中为0的样本数量是199，为1的样本数量是2512"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[  199.  2512.]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(conditional_probability[0][500]) # "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "下面之所以将概率归到[1.10001]，是因为上面所有关于概率的部分都是直接用样本数量，而不是实际的概率来记录的。这么做应该是为了在工程上解决内存，但是这种工程上的优化，对于理解书中的公式造成了影响。"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "而且下面计算概率的时候有点问题：\n",
+    "            probalility_0 = (float(pix_0)/float(pix_0+pix_1))*1000000 + 1\n",
+    "分母部分是，属于i类(0~9)的图像中，像素j的数量……对啊，这个像素j的数量其实就是pix_0和pix_1的和，即属于i类的图像的数量。看来这里没问题，是我想多了。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# 将概率归到[1.10001]\n",
+    "for i in range(class_num):\n",
+    "    for j in range(feature_len):\n",
+    "\n",
+    "        # 经过二值化后图像只有0，1两种取值\n",
+    "        pix_0 = conditional_probability[i][j][0]  # 属于i类(0~9)的图像中，像素j(0~783)为0的数量\n",
+    "        pix_1 = conditional_probability[i][j][1]  # 属于i类(0~9)的图像中，像素j(0~783)为1的数量\n",
+    "\n",
+    "        # 计算0，1像素点对应的条件概率\n",
+    "        probalility_0 = (float(pix_0)/float(pix_0+pix_1))*1000000 + 1\n",
+    "        probalility_1 = (float(pix_1)/float(pix_0+pix_1))*1000000 + 1\n",
+    "\n",
+    "        conditional_probability[i][j][0] = probalility_0\n",
+    "        conditional_probability[i][j][1] = probalility_1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([  1.00000000e+00,   1.00000100e+06])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conditional_probability[0][0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "得到了prior_probability和conditional_probability，这就算是训练结束了。\n",
+    "\n",
+    "# test (predict)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(100, 784)"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 为了加快预测速度，这里直接取100个测试样本\n",
+    "\n",
+    "testset = test_features[:100]\n",
+    "testset.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "\n",
+    "$$p (y^{(i)} = j \\mid x_{k}^{(i)})  = \\frac{p (x_{k}^{(i)} \\mid y^{(i)} = j) \\cdot p(y^{(i)} = j)}{p(x_{k}^{(i)})}$$\n",
+    "\n",
+    "$p (y^{(i)} = j \\mid x_{k}^{(i)}) $中，$y^{(i)} = j$表示从属于哪一类，$x_{k}^{(i)}$表示哪一个像素点。\n",
+    "\n",
+    "下面calculate_probability函数就是在计算分子部分。\n",
+    "\n",
+    "`probability *= int(conditional_probability[label][i][img[i]])`\n",
+    "\n",
+    "这行代码中：\n",
+    "- probability表示先验概率 $p(y^{(i)} = j)$\n",
+    "- `conditional_probability[label][i][img[i]]`表示 $p (x_{k}^{(i)} \\mid y^{(i)} = j) $\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# 计算不同标签下，testdata的概率\n",
+    "def calculate_probability(img, label):\n",
+    "    probability = int(prior_probability[label])\n",
+    "\n",
+    "    for i in range(len(img)):\n",
+    "        probability *= int(conditional_probability[label][i][img[i]])\n",
+    "\n",
+    "    return probability"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "predict = []\n",
+    "\n",
+    "for img in testset:\n",
+    "\n",
+    "    # 图像二值化\n",
+    "    img = binaryzation(img)\n",
+    "\n",
+    "    max_label = 0\n",
+    "    max_probability = calculate_probability(img, 0)\n",
+    "\n",
+    "    for j in range(1, 10):\n",
+    "        probability = calculate_probability(img, j)\n",
+    "\n",
+    "        if max_probability < probability:\n",
+    "            max_label = j\n",
+    "            max_probability = probability\n",
+    "\n",
+    "    predict.append(max_label)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "test_predict = np.array(predict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.76000000000000001"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "score = accuracy_score(test_labels[:100], test_predict)\n",
+    "score"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 重构朴素贝叶斯算法\n",
+    "\n",
+    "![](https://pic1.zhimg.com/v2-e17426fd0627560f1fc82118dd1d5d14_r.jpg)\n",
+    "\n",
+    "朴素贝叶斯认为所有特征都是独立的，然后得出一个样本出现的概率使其所有特征出现概率的联乘。\n",
+    "\n",
+    "首先求每一个标签的先验概率："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(10,)\n",
+      "(10, 784, 2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "class_num = 10\n",
+    "feature_len = 784\n",
+    "\n",
+    "# 存放每个label的数量\n",
+    "class_number = np.zeros(class_num)      \n",
+    "\n",
+    "# 存放先验概率\n",
+    "prior_probability = np.zeros(class_num)      \n",
+    "print(prior_probability.shape)\n",
+    "# 存放条件概率\n",
+    "conditional_probability = np.zeros((class_num, feature_len, 2))    \n",
+    "print(conditional_probability.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# 计算先验概率\n",
+    "for i in range(len(train_labels)):\n",
+    "    img = binaryzation(trainset[i])     # 图片二值化\n",
+    "    label = train_labels[i]\n",
+    "\n",
+    "    class_number[label] += 1      # 每个label的图片各有多少个\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 0.09633973,  0.11361052,  0.10049751,  0.10294954,  0.09776119,\n",
+       "        0.09115139,  0.09840085,  0.10533049,  0.09431414,  0.09964463])"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "class_number/len(train_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 2711.,  3197.,  2828.,  2897.,  2751.,  2565.,  2769.,  2964.,\n",
+       "        2654.,  2804.])"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "class_number"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "prior_probability = class_number / len(train_labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "计算条件概率: \n",
+    "\n",
+    "$$p (X^{(i)} = a_{jl} \\mid Y = c_k)$$\n",
+    "\n",
+    "在标签为$c_k$的前提下，样本x的第$j$个特征（像素点）的第$l$个值（经过二值化处理，这里的$l$只有0或1两种可能）。conditional_probability的维度是`(10, 784, 2)`，最后的那个2，指的就是每个特征可以取的值。如果不做二值化处理，那么每个像素点应该有256种取值。"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# 条件概率\n",
+    "conditional_probability = np.zeros((class_num, feature_len, 2))    \n",
+    "\n",
+    "for i in range(len(train_labels)):\n",
+    "    img = binaryzation(trainset[i])     # 图片二值化\n",
+    "    label = train_labels[i]\n",
+    "    for j in range(feature_len):\n",
+    "        conditional_probability[label][j][img[j]] += 1   # 这里只得到a_jl的数量"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([  199.,  2512.])"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conditional_probability[0][500] "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2711.0"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "class_number[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 0.07340465,  0.92659535])"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conditional_probability[0][500] / class_number[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "conditional_probability_fraction = np.zeros((class_num, feature_len, 2))    \n",
+    "\n",
+    "for i in range(len(train_labels)):\n",
+    "    label = train_labels[i]\n",
+    "    for j in range(feature_len):\n",
+    "        conditional_probability_fraction[label][j] = conditional_probability[label][j] / class_number[label]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 0.07340465,  0.92659535])"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conditional_probability_fraction[0][500]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "发现上面如果分开两循环写的话冗长，这里还是应该写在一起："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# 计算先验概率及条件概率\n",
+    "for i in range(len(train_labels)):\n",
+    "    img = binaryzation(trainset[i])     # 图片二值化\n",
+    "    label = train_labels[i]\n",
+    "\n",
+    "    class_number[label] += 1      # 每个label的图片各有多少个\n",
+    "    prior_probability = class_number / len(train_labels)\n",
+    "\n",
+    "    for j in range(feature_len):\n",
+    "        conditional_probability[label][j][img[j]] += 1  \n",
+    "        # 在所有训练样本中，标签=0的样本中，像素点=0的对应位置上，一共有多少个样本是0，一共有多少个样本是1\n",
+    "        \n",
+    "# 推荐概率        \n",
+    "for i in range(class_num):\n",
+    "    for j in range(feature_len):\n",
+    "        conditional_probability[label][j] = conditional_probability[label][j] / class_number[label]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "上面就算完成了第一步，计算完了先验概率和条件概率。接下来第二步对测试集进行预测:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(100, 784)"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "testset.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# 写一个函数来计算每一个标签下，对应的概率\n",
+    "def calculate_probability(img, label):\n",
+    "    probability = prior_probability[label] # 先验概率\n",
+    "\n",
+    "    # 对每一个像素点进行迭代，计算在laebl固定的情况下，每一个像素点的概率，然后联乘\n",
+    "    for i in range(len(img)):\n",
+    "        probability *= conditional_probability[label][i][img[i]] \n",
+    "        # [i]表示一个测试样本中，第i个像素点\n",
+    "        # img[i]表示一个测试样本中，第i个像素点是0还是1\n",
+    "\n",
+    "    return probability"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/xu/anaconda/envs/py35/lib/python3.5/site-packages/ipykernel/__main__.py:7: RuntimeWarning: overflow encountered in double_scalars\n",
+      "/Users/xu/anaconda/envs/py35/lib/python3.5/site-packages/ipykernel/__main__.py:7: RuntimeWarning: invalid value encountered in double_scalars\n"
+     ]
+    }
+   ],
+   "source": [
+    "predict = []\n",
+    "\n",
+    "for img in testset:\n",
+    "    img = binaryzation(img)\n",
+    "    \n",
+    "    max_label = 0\n",
+    "    max_probability = calculate_probability(img, 0)\n",
+    "    \n",
+    "    for j in range(1, 10):\n",
+    "        probability = calculate_probability(img, j)\n",
+    "        \n",
+    "        if max_probability < probability:\n",
+    "            max_label = j\n",
+    "            max_probability = probability\n",
+    "\n",
+    "    predict.append(max_label)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "看来确实是这样，为了防止溢出，所以源代码里才一直用数量代替。\n",
+    "\n",
+    "看来这个算法不需要我改了，源代码其实已经考虑了溢出的问题。"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [py35]",
+   "language": "python",
+   "name": "Python [py35]"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/perceptron/binary_perceptron.py b/perceptron/binary_perceptron.py
index e6ff6e0..187ba8c 100644
--- a/perceptron/binary_perceptron.py
+++ b/perceptron/binary_perceptron.py
@@ -12,7 +12,7 @@
 import random
 import time
 
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 
@@ -23,7 +23,7 @@ def __init__(self):
         self.max_iteration = 5000
 
     def predict_(self, x):
-        wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))])
+        wx = sum([self.w[j] * x[j] for j in range(len(self.w))])
         return int(wx > 0)
 
     def train(self, features, labels):
@@ -37,7 +37,7 @@ def train(self, features, labels):
             x = list(features[index])
             x.append(1.0)
             y = 2 * labels[index] - 1
-            wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))])
+            wx = sum([self.w[j] * x[j] for j in range(len(self.w))])
 
             if wx * y > 0:
                 correct_count += 1
@@ -45,7 +45,7 @@ def train(self, features, labels):
                     break
                 continue
 
-            for i in xrange(len(self.w)):
+            for i in range(len(self.w)):
                 self.w[i] += self.learning_step * (y * x[i])
 
     def predict(self,features):
@@ -59,7 +59,7 @@ def predict(self,features):
 
 if __name__ == '__main__':
 
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
@@ -76,19 +76,19 @@ def predict(self,features):
     # print train_features.shape
 
     time_2 = time.time()
-    print 'read data cost ', time_2 - time_1, ' second', '\n'
+    print('read data cost ', time_2 - time_1, ' second', '\n')
 
-    print 'Start training'
+    print('Start training')
     p = Perceptron()
     p.train(train_features, train_labels)
 
     time_3 = time.time()
-    print 'training cost ', time_3 - time_2, ' second', '\n'
+    print('training cost ', time_3 - time_2, ' second', '\n')
 
-    print 'Start predicting'
+    print('Start predicting')
     test_predict = p.predict(test_features)
     time_4 = time.time()
-    print 'predicting cost ', time_4 - time_3, ' second', '\n'
+    print('predicting cost ', time_4 - time_3, ' second', '\n')
 
     score = accuracy_score(test_labels, test_predict)
-    print "The accruacy socre is ", score
+    print("The accruacy socre is ", score)
diff --git a/svm/__pycache__/generate_dataset.cpython-35.pyc b/svm/__pycache__/generate_dataset.cpython-35.pyc
new file mode 100644
index 0000000..62efb2a
Binary files /dev/null and b/svm/__pycache__/generate_dataset.cpython-35.pyc differ
diff --git a/svm/generate_dataset.py b/svm/generate_dataset.py
index b71102e..cdf8262 100644
--- a/svm/generate_dataset.py
+++ b/svm/generate_dataset.py
@@ -49,7 +49,7 @@ def data_visualization(X,y,title):
 
     size = len(y)
 
-    for i in xrange(size):
+    for i in range(size):
         X_1 = X[0][i]
         X_2 = X[1][i]
 
@@ -76,7 +76,7 @@ def rebuild_features(features):
     size = len(features[0])
 
     new_features = []
-    for i in xrange(size):
+    for i in range(size):
         new_features.append([features[0][i],features[1][i]])
 
     return new_features
@@ -92,7 +92,7 @@ def generate_dataset(size, noisy = False, visualization = True):
 
     testset_size = int(len(y)*0.333)
 
-    indexes = [i for i in xrange(len(y))]
+    indexes = [i for i in range(len(y))]
     test_indexes = random.sample(indexes,testset_size)
     train_indexes = list(set(indexes)-set(test_indexes))
 
diff --git a/svm/svm.py b/svm/svm.py
index cb56839..d145a4e 100644
--- a/svm/svm.py
+++ b/svm/svm.py
@@ -12,7 +12,7 @@
 import logging
 
 import pandas as pd
-from sklearn.cross_validation import train_test_split
+from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score
 
 from generate_dataset import *
@@ -36,7 +36,7 @@ def _init_parameters(self, features, labels):
         self.n = len(features[0])
         self.N = len(features)
         self.alpha = [0.0] * self.N
-        self.E = [self._E_(i) for i in xrange(self.N)]
+        self.E = [self._E_(i) for i in range(self.N)]
 
         self.C = 1000
         self.Max_Interation = 5000
@@ -63,14 +63,17 @@ def _select_two_parameters(self):
         '''
         按照书上7.4.2选择两个变量
         '''
-        index_list = [i for i in xrange(self.N)]
+        index_list = [i for i in range(self.N)]
 
-        i1_list_1 = filter(lambda i: self.alpha[i] > 0 and self.alpha[i] < self.C, index_list)
+        i1_list_1 = list(filter(lambda i: self.alpha[i] > 0 and self.alpha[i] < self.C, index_list))
         i1_list_2 = list(set(index_list) - set(i1_list_1))
 
         i1_list = i1_list_1
         i1_list.extend(i1_list_2)
-
+        '''
+        python 提示AttributeError: 'range' object has no attribute 'extend'
+        key:listtemp=list(range(...))
+        '''
         for i in i1_list:
             if self._satisfy_KKT(i):
                 continue
@@ -94,13 +97,13 @@ def _K_(self, x1, x2):
         '''
 
         if self.kernel == 'linear':
-            return sum([x1[k] * x2[k] for k in xrange(self.n)])
+            return sum([x1[k] * x2[k] for k in range(self.n)])
         if self.kernel == 'poly':
-            return (sum([x1[k] * x2[k] for k in xrange(self.n)])+1)**3
+            return (sum([x1[k] * x2[k] for k in range(self.n)])+1)**3
 
 
 
-        print '没有定义核函数'
+        print('没有定义核函数')
         return 0
 
     def _g_(self, i):
@@ -109,7 +112,7 @@ def _g_(self, i):
         '''
         result = self.b
 
-        for j in xrange(self.N):
+        for j in range(self.N):
             result += self.alpha[j] * self.Y[j] * self._K_(self.X[i], self.X[j])
 
         return result
@@ -122,7 +125,7 @@ def _E_(self, i):
 
     def try_E(self,i):
         result = self.b-self.Y[i]
-        for j in xrange(self.N):
+        for j in range(self.N):
             if self.alpha[j]<0 or self.alpha[j]>self.C:
                 continue
             result += self.Y[j]*self.alpha[j]*self._K_(self.X[i],self.X[j])
@@ -133,7 +136,7 @@ def train(self, features, labels):
 
         self._init_parameters(features, labels)
 
-        for times in xrange(self.Max_Interation):
+        for times in range(self.Max_Interation):
             # if self.is_stop():
             #     return
 
@@ -190,7 +193,7 @@ def train(self, features, labels):
     def _predict_(self,feature):
         result = self.b
 
-        for i in xrange(self.N):
+        for i in range(self.N):
             result += self.alpha[i]*self.Y[i]*self._K_(feature,self.X[i])
 
         if result > 0:
@@ -210,7 +213,7 @@ def predict(self,features):
     logger = logging.getLogger()
     logger.setLevel(logging.DEBUG)
 
-    print 'Start read data'
+    print('Start read data')
 
     time_1 = time.time()
 
@@ -218,20 +221,20 @@ def predict(self,features):
     train_features, train_labels, test_features, test_labels = generate_dataset(2000,visualization=False)
 
     time_2 = time.time()
-    print 'read data cost ',time_2 - time_1,' second','\n'
+    print('read data cost ',time_2 - time_1,' second','\n')
 
-    print 'Start training'
+    print('Start training')
     svm = SVM()
     svm.train(train_features, train_labels)
 
     time_3 = time.time()
-    print 'training cost ',time_3 - time_2,' second','\n'
+    print('training cost ',time_3 - time_2,' second','\n')
 
-    print 'Start predicting'
+    print('Start predicting')
     test_predict = svm.predict(test_features)
     time_4 = time.time()
-    print 'predicting cost ',time_4 - time_3,' second','\n'
+    print('predicting cost ',time_4 - time_3,' second','\n')
 
     score = accuracy_score(test_labels,test_predict)
-    print "svm1 the accruacy socre is ", score
+    print("svm1 the accruacy socre is ", score)