diff --git a/AdaBoost/adaboost.py b/AdaBoost/adaboost.py index 1851f3b..645973f 100644 --- a/AdaBoost/adaboost.py +++ b/AdaBoost/adaboost.py @@ -12,7 +12,7 @@ import numpy as np import pandas as pd -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score sign_time_count = 0 @@ -48,7 +48,7 @@ def _train_less_than_(self): for i in self.indexes: score = 0 - for j in xrange(self.N): + for j in range(self.N): val = -1 if self.X[j] 0) def train(self, features, labels): @@ -37,7 +37,7 @@ def train(self, features, labels): x = list(features[index]) x.append(1.0) y = 2 * labels[index] - 1 - wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))]) + wx = sum([self.w[j] * x[j] for j in range(len(self.w))]) if wx * y > 0: correct_count += 1 @@ -45,7 +45,7 @@ def train(self, features, labels): break continue - for i in xrange(len(self.w)): + for i in range(len(self.w)): self.w[i] += self.learning_step * (y * x[i]) def predict(self,features): @@ -59,7 +59,7 @@ def predict(self,features): if __name__ == '__main__': - print 'Start read data' + print('Start read data') time_1 = time.time() @@ -76,19 +76,19 @@ def predict(self,features): # print train_features.shape time_2 = time.time() - print 'read data cost ', time_2 - time_1, ' second', '\n' + print('read data cost ', time_2 - time_1, ' second', '\n') - print 'Start training' + print('Start training') p = Perceptron() p.train(train_features, train_labels) time_3 = time.time() - print 'training cost ', time_3 - time_2, ' second', '\n' + print('training cost ', time_3 - time_2, ' second', '\n') - print 'Start predicting' + print('Start predicting') test_predict = p.predict(test_features) time_4 = time.time() - print 'predicting cost ', time_4 - time_3, ' second', '\n' + print('predicting cost ', time_4 - time_3, ' second', '\n') score = accuracy_score(test_labels, test_predict) - print "The accruacy socre is ", score + print("The accruacy socre is ", score) diff --git a/logistic_regression/competation.py b/logistic_regression/competation.py index cba8f01..7386ef1 100644 --- a/logistic_regression/competation.py +++ b/logistic_regression/competation.py @@ -11,7 +11,7 @@ from binary_perceptron import Perceptron from logistic_regression import LogisticRegression -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score if __name__ == '__main__': @@ -29,8 +29,8 @@ writer = csv.writer(file('result.csv', 'wb')) - for time in xrange(test_time): - print 'iterater time %d' % time + for time in range(test_time): + print('iterater time %d' % time) train_features, test_features, train_labels, test_labels = train_test_split( imgs, labels, test_size=0.33, random_state=23323) @@ -44,7 +44,7 @@ p_score = accuracy_score(test_labels, p_predict) lr_score = accuracy_score(test_labels, lr_predict) - print 'perceptron accruacy score ', p_score - print 'logistic Regression accruacy score ', lr_score + print('perceptron accruacy score ', p_score) + print('logistic Regression accruacy score ', lr_score) writer.writerow([time,p_score,lr_score]) diff --git a/logistic_regression/logistic_regression.py b/logistic_regression/logistic_regression.py index 84e8233..5da8901 100644 --- a/logistic_regression/logistic_regression.py +++ b/logistic_regression/logistic_regression.py @@ -10,7 +10,7 @@ import random import pandas as pd -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score @@ -21,7 +21,7 @@ def __init__(self): self.max_iteration = 5000 def predict_(self,x): - wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))]) + wx = sum([self.w[j] * x[j] for j in range(len(self.w))]) exp_wx = math.exp(wx) predict1 = exp_wx / (1 + exp_wx) @@ -55,10 +55,10 @@ def train(self,features, labels): time += 1 correct_count = 0 - wx = sum([self.w[i] * x[i] for i in xrange(len(self.w))]) + wx = sum([self.w[i] * x[i] for i in range(len(self.w))]) exp_wx = math.exp(wx) - for i in xrange(len(self.w)): + for i in range(len(self.w)): self.w[i] -= self.learning_step * \ (-y * x[i] + float(x[i] * exp_wx) / float(1 + exp_wx)) @@ -74,7 +74,7 @@ def predict(self,features): return labels if __name__ == "__main__": - print 'Start read data' + print('Start read data') time_1 = time.time() @@ -89,19 +89,19 @@ def predict(self,features): train_features, test_features, train_labels, test_labels = train_test_split(imgs, labels, test_size=0.33, random_state=23323) time_2 = time.time() - print 'read data cost ',time_2 - time_1,' second','\n' + print('read data cost ',time_2 - time_1,' second','\n') - print 'Start training' + print('Start training') lr = LogisticRegression() lr.train(train_features, train_labels) time_3 = time.time() - print 'training cost ',time_3 - time_2,' second','\n' + print('training cost ',time_3 - time_2,' second','\n') - print 'Start predicting' + print('Start predicting') test_predict = lr.predict(test_features) time_4 = time.time() - print 'predicting cost ',time_4 - time_3,' second','\n' + print('predicting cost ',time_4 - time_3,' second','\n') score = accuracy_score(test_labels,test_predict) - print "The accruacy socre is ", score + print("The accruacy socre is ", score) diff --git a/maxENT/maxENT.py b/maxENT/maxENT.py index 3acad95..e5ba71e 100644 --- a/maxENT/maxENT.py +++ b/maxENT/maxENT.py @@ -15,7 +15,7 @@ from collections import defaultdict -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score @@ -47,7 +47,7 @@ def cal_Pxy_Px(self, X, Y): self.Pxy = defaultdict(int) self.Px = defaultdict(int) - for i in xrange(len(X)): + for i in range(len(X)): x_, y = X[i], Y[i] self.Y_.add(y) @@ -60,7 +60,7 @@ def cal_EPxy(self): 计算书中82页最下面那个期望 ''' self.EPxy = defaultdict(float) - for id in xrange(self.n): + for id in range(self.n): (x, y) = self.id2xy[id] self.EPxy[id] = float(self.Pxy[(x, y)]) / float(self.N) @@ -84,7 +84,7 @@ def cal_EPx(self): ''' 计算书83页最上面那个期望 ''' - self.EPx = [0.0 for i in xrange(self.n)] + self.EPx = [0.0 for i in range(self.n)] for i, X in enumerate(self.X_): Pyxs = self.cal_probality(X) @@ -104,19 +104,19 @@ def train(self, X, Y): self.w = [0.0 for i in range(self.n)] max_iteration = 1000 - for times in xrange(max_iteration): - print 'iterater times %d' % times + for times in range(max_iteration): + print('iterater times %d' % times) sigmas = [] self.cal_EPx() - for i in xrange(self.n): + for i in range(self.n): sigma = 1 / self.M * math.log(self.EPxy[i] / self.EPx[i]) sigmas.append(sigma) # if len(filter(lambda x: abs(x) >= 0.01, sigmas)) == 0: # break - self.w = [self.w[i] + sigmas[i] for i in xrange(self.n)] + self.w = [self.w[i] + sigmas[i] for i in range(self.n)] def predict(self, testset): results = [] @@ -142,7 +142,7 @@ def rebuild_features(features): if __name__ == "__main__": - print 'Start read data' + print('Start read data') time_1 = time.time() @@ -160,19 +160,19 @@ def rebuild_features(features): test_features = rebuild_features(test_features) time_2 = time.time() - print 'read data cost ', time_2 - time_1, ' second', '\n' + print('read data cost ', time_2 - time_1, ' second', '\n') - print 'Start training' + print('Start training') met = MaxEnt() met.train(train_features, train_labels) time_3 = time.time() - print 'training cost ', time_3 - time_2, ' second', '\n' + print('training cost ', time_3 - time_2, ' second', '\n') - print 'Start predicting' + print('Start predicting') test_predict = met.predict(test_features) time_4 = time.time() - print 'predicting cost ', time_4 - time_3, ' second', '\n' + print('predicting cost ', time_4 - time_3, ' second', '\n') score = accuracy_score(test_labels, test_predict) - print "The accruacy socre is ", score + print("The accruacy socre is ", score) diff --git a/naive_bayes/naive_bayes.py b/naive_bayes/naive_bayes.py index 07c2091..099e54d 100644 --- a/naive_bayes/naive_bayes.py +++ b/naive_bayes/naive_bayes.py @@ -6,13 +6,13 @@ import random import time -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # 二值化 def binaryzation(img): cv_img = img.astype(np.uint8) - cv2.threshold(cv_img,50,1,cv2.cv.CV_THRESH_BINARY_INV,cv_img) + cv2.threshold(cv_img,50,1,cv2.THRESH_BINARY_INV,cv_img) return cv_img def Train(trainset,train_labels): @@ -83,7 +83,7 @@ def Predict(testset,prior_probability,conditional_probability): if __name__ == '__main__': - print 'Start read data' + print('Start read data') time_1 = time.time() @@ -99,17 +99,17 @@ def Predict(testset,prior_probability,conditional_probability): # print train_features.shape time_2 = time.time() - print 'read data cost ',time_2 - time_1,' second','\n' + print('read data cost ',time_2 - time_1,' second','\n') - print 'Start training' + print('Start training') prior_probability,conditional_probability = Train(train_features,train_labels) time_3 = time.time() - print 'training cost ',time_3 - time_2,' second','\n' + print('training cost ',time_3 - time_2,' second','\n') - print 'Start predicting' + print('Start predicting') test_predict = Predict(test_features,prior_probability,conditional_probability) time_4 = time.time() - print 'predicting cost ',time_4 - time_3,' second','\n' + print('predicting cost ',time_4 - time_3,' second','\n') score = accuracy_score(test_labels,test_predict) - print "The accruacy socre is ", score \ No newline at end of file + print("The accruacy socre is ", score) \ No newline at end of file diff --git a/perceptron/binary_perceptron.py b/perceptron/binary_perceptron.py index e6ff6e0..187ba8c 100644 --- a/perceptron/binary_perceptron.py +++ b/perceptron/binary_perceptron.py @@ -12,7 +12,7 @@ import random import time -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score @@ -23,7 +23,7 @@ def __init__(self): self.max_iteration = 5000 def predict_(self, x): - wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))]) + wx = sum([self.w[j] * x[j] for j in range(len(self.w))]) return int(wx > 0) def train(self, features, labels): @@ -37,7 +37,7 @@ def train(self, features, labels): x = list(features[index]) x.append(1.0) y = 2 * labels[index] - 1 - wx = sum([self.w[j] * x[j] for j in xrange(len(self.w))]) + wx = sum([self.w[j] * x[j] for j in range(len(self.w))]) if wx * y > 0: correct_count += 1 @@ -45,7 +45,7 @@ def train(self, features, labels): break continue - for i in xrange(len(self.w)): + for i in range(len(self.w)): self.w[i] += self.learning_step * (y * x[i]) def predict(self,features): @@ -59,7 +59,7 @@ def predict(self,features): if __name__ == '__main__': - print 'Start read data' + print('Start read data') time_1 = time.time() @@ -76,19 +76,19 @@ def predict(self,features): # print train_features.shape time_2 = time.time() - print 'read data cost ', time_2 - time_1, ' second', '\n' + print('read data cost ', time_2 - time_1, ' second', '\n') - print 'Start training' + print('Start training') p = Perceptron() p.train(train_features, train_labels) time_3 = time.time() - print 'training cost ', time_3 - time_2, ' second', '\n' + print('training cost ', time_3 - time_2, ' second', '\n') - print 'Start predicting' + print('Start predicting') test_predict = p.predict(test_features) time_4 = time.time() - print 'predicting cost ', time_4 - time_3, ' second', '\n' + print('predicting cost ', time_4 - time_3, ' second', '\n') score = accuracy_score(test_labels, test_predict) - print "The accruacy socre is ", score + print("The accruacy socre is ", score) diff --git a/svm/__pycache__/generate_dataset.cpython-35.pyc b/svm/__pycache__/generate_dataset.cpython-35.pyc new file mode 100644 index 0000000..62efb2a Binary files /dev/null and b/svm/__pycache__/generate_dataset.cpython-35.pyc differ diff --git a/svm/generate_dataset.py b/svm/generate_dataset.py index b71102e..cdf8262 100644 --- a/svm/generate_dataset.py +++ b/svm/generate_dataset.py @@ -49,7 +49,7 @@ def data_visualization(X,y,title): size = len(y) - for i in xrange(size): + for i in range(size): X_1 = X[0][i] X_2 = X[1][i] @@ -76,7 +76,7 @@ def rebuild_features(features): size = len(features[0]) new_features = [] - for i in xrange(size): + for i in range(size): new_features.append([features[0][i],features[1][i]]) return new_features @@ -92,7 +92,7 @@ def generate_dataset(size, noisy = False, visualization = True): testset_size = int(len(y)*0.333) - indexes = [i for i in xrange(len(y))] + indexes = [i for i in range(len(y))] test_indexes = random.sample(indexes,testset_size) train_indexes = list(set(indexes)-set(test_indexes)) diff --git a/svm/svm.py b/svm/svm.py index cb56839..d145a4e 100644 --- a/svm/svm.py +++ b/svm/svm.py @@ -12,7 +12,7 @@ import logging import pandas as pd -from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from generate_dataset import * @@ -36,7 +36,7 @@ def _init_parameters(self, features, labels): self.n = len(features[0]) self.N = len(features) self.alpha = [0.0] * self.N - self.E = [self._E_(i) for i in xrange(self.N)] + self.E = [self._E_(i) for i in range(self.N)] self.C = 1000 self.Max_Interation = 5000 @@ -63,14 +63,17 @@ def _select_two_parameters(self): ''' 按照书上7.4.2选择两个变量 ''' - index_list = [i for i in xrange(self.N)] + index_list = [i for i in range(self.N)] - i1_list_1 = filter(lambda i: self.alpha[i] > 0 and self.alpha[i] < self.C, index_list) + i1_list_1 = list(filter(lambda i: self.alpha[i] > 0 and self.alpha[i] < self.C, index_list)) i1_list_2 = list(set(index_list) - set(i1_list_1)) i1_list = i1_list_1 i1_list.extend(i1_list_2) - + ''' + python 提示AttributeError: 'range' object has no attribute 'extend' + key:listtemp=list(range(...)) + ''' for i in i1_list: if self._satisfy_KKT(i): continue @@ -94,13 +97,13 @@ def _K_(self, x1, x2): ''' if self.kernel == 'linear': - return sum([x1[k] * x2[k] for k in xrange(self.n)]) + return sum([x1[k] * x2[k] for k in range(self.n)]) if self.kernel == 'poly': - return (sum([x1[k] * x2[k] for k in xrange(self.n)])+1)**3 + return (sum([x1[k] * x2[k] for k in range(self.n)])+1)**3 - print '没有定义核函数' + print('没有定义核函数') return 0 def _g_(self, i): @@ -109,7 +112,7 @@ def _g_(self, i): ''' result = self.b - for j in xrange(self.N): + for j in range(self.N): result += self.alpha[j] * self.Y[j] * self._K_(self.X[i], self.X[j]) return result @@ -122,7 +125,7 @@ def _E_(self, i): def try_E(self,i): result = self.b-self.Y[i] - for j in xrange(self.N): + for j in range(self.N): if self.alpha[j]<0 or self.alpha[j]>self.C: continue result += self.Y[j]*self.alpha[j]*self._K_(self.X[i],self.X[j]) @@ -133,7 +136,7 @@ def train(self, features, labels): self._init_parameters(features, labels) - for times in xrange(self.Max_Interation): + for times in range(self.Max_Interation): # if self.is_stop(): # return @@ -190,7 +193,7 @@ def train(self, features, labels): def _predict_(self,feature): result = self.b - for i in xrange(self.N): + for i in range(self.N): result += self.alpha[i]*self.Y[i]*self._K_(feature,self.X[i]) if result > 0: @@ -210,7 +213,7 @@ def predict(self,features): logger = logging.getLogger() logger.setLevel(logging.DEBUG) - print 'Start read data' + print('Start read data') time_1 = time.time() @@ -218,20 +221,20 @@ def predict(self,features): train_features, train_labels, test_features, test_labels = generate_dataset(2000,visualization=False) time_2 = time.time() - print 'read data cost ',time_2 - time_1,' second','\n' + print('read data cost ',time_2 - time_1,' second','\n') - print 'Start training' + print('Start training') svm = SVM() svm.train(train_features, train_labels) time_3 = time.time() - print 'training cost ',time_3 - time_2,' second','\n' + print('training cost ',time_3 - time_2,' second','\n') - print 'Start predicting' + print('Start predicting') test_predict = svm.predict(test_features) time_4 = time.time() - print 'predicting cost ',time_4 - time_3,' second','\n' + print('predicting cost ',time_4 - time_3,' second','\n') score = accuracy_score(test_labels,test_predict) - print "svm1 the accruacy socre is ", score + print("svm1 the accruacy socre is ", score)