| 
 | 1 | +# encoding=utf8  | 
 | 2 | + | 
 | 3 | +import math  | 
 | 4 | +import pandas as pd  | 
 | 5 | +import numpy as np  | 
 | 6 | +import random  | 
 | 7 | +import time  | 
 | 8 | + | 
 | 9 | +from sklearn.model_selection import train_test_split  | 
 | 10 | +from sklearn.metrics import accuracy_score  | 
 | 11 | + | 
 | 12 | + | 
 | 13 | +class Softmax(object):  | 
 | 14 | + | 
 | 15 | +    def __init__(self):  | 
 | 16 | +        self.learning_step = 0.000001           # 学习速率  | 
 | 17 | +        self.max_iteration = 100000             # 最大迭代次数  | 
 | 18 | +        self.weight_lambda = 0.01               # 衰退权重  | 
 | 19 | + | 
 | 20 | +    def cal_e(self,x,l):  | 
 | 21 | +        '''  | 
 | 22 | +        计算博客中的公式3  | 
 | 23 | +        '''  | 
 | 24 | + | 
 | 25 | +        theta_l = self.w[l]  | 
 | 26 | +        product = np.dot(theta_l,x)  | 
 | 27 | + | 
 | 28 | +        return math.exp(product)  | 
 | 29 | + | 
 | 30 | +    def cal_probability(self,x,j):  | 
 | 31 | +        '''  | 
 | 32 | +        计算博客中的公式2  | 
 | 33 | +        '''  | 
 | 34 | + | 
 | 35 | +        molecule = self.cal_e(x,j)  | 
 | 36 | +        denominator = sum([self.cal_e(x,i) for i in range(self.k)])  | 
 | 37 | + | 
 | 38 | +        return molecule/denominator  | 
 | 39 | + | 
 | 40 | + | 
 | 41 | +    def cal_partial_derivative(self,x,y,j):  | 
 | 42 | +        '''  | 
 | 43 | +        计算博客中的公式1  | 
 | 44 | +        '''  | 
 | 45 | + | 
 | 46 | +        first = int(y==j)                           # 计算示性函数  | 
 | 47 | +        second = self.cal_probability(x,j)          # 计算后面那个概率  | 
 | 48 | + | 
 | 49 | +        return -x*(first-second) + self.weight_lambda*self.w[j]  | 
 | 50 | + | 
 | 51 | +    def predict_(self, x):  | 
 | 52 | +        result = np.dot(self.w,x)  | 
 | 53 | +        row, column = result.shape  | 
 | 54 | + | 
 | 55 | +        # 找最大值所在的列  | 
 | 56 | +        _positon = np.argmax(result)  | 
 | 57 | +        m, n = divmod(_positon, column)  | 
 | 58 | + | 
 | 59 | +        return m  | 
 | 60 | + | 
 | 61 | +    def train(self, features, labels):  | 
 | 62 | +        self.k = len(set(labels))  | 
 | 63 | + | 
 | 64 | +        self.w = np.zeros((self.k,len(features[0])+1))  | 
 | 65 | +        time = 0  | 
 | 66 | + | 
 | 67 | +        while time < self.max_iteration:  | 
 | 68 | +            print('loop %d' % time)  | 
 | 69 | +            time += 1  | 
 | 70 | +            index = random.randint(0, len(labels) - 1)  | 
 | 71 | + | 
 | 72 | +            x = features[index]  | 
 | 73 | +            y = labels[index]  | 
 | 74 | + | 
 | 75 | +            x = list(x)  | 
 | 76 | +            x.append(1.0)  | 
 | 77 | +            x = np.array(x)  | 
 | 78 | + | 
 | 79 | +            derivatives = [self.cal_partial_derivative(x,y,j) for j in range(self.k)]  | 
 | 80 | + | 
 | 81 | +            for j in range(self.k):  | 
 | 82 | +                self.w[j] -= self.learning_step * derivatives[j]  | 
 | 83 | + | 
 | 84 | +    def predict(self,features):  | 
 | 85 | +        labels = []  | 
 | 86 | +        for feature in features:  | 
 | 87 | +            x = list(feature)  | 
 | 88 | +            x.append(1)  | 
 | 89 | + | 
 | 90 | +            x = np.matrix(x)  | 
 | 91 | +            x = np.transpose(x)  | 
 | 92 | + | 
 | 93 | +            labels.append(self.predict_(x))  | 
 | 94 | +        return labels  | 
 | 95 | + | 
 | 96 | + | 
 | 97 | +if __name__ == '__main__':  | 
 | 98 | + | 
 | 99 | +    print('Start read data')  | 
 | 100 | + | 
 | 101 | +    time_1 = time.time()  | 
 | 102 | + | 
 | 103 | +    raw_data = pd.read_csv('../data/train.csv', header=0)  | 
 | 104 | +    data = raw_data.values  | 
 | 105 | + | 
 | 106 | +    imgs = data[0::, 1::]  | 
 | 107 | +    labels = data[::, 0]  | 
 | 108 | + | 
 | 109 | +    # 选取 2/3 数据作为训练集, 1/3 数据作为测试集  | 
 | 110 | +    train_features, test_features, train_labels, test_labels = train_test_split(  | 
 | 111 | +        imgs, labels, test_size=0.33, random_state=23323)  | 
 | 112 | +    # print train_features.shape  | 
 | 113 | +    # print train_features.shape  | 
 | 114 | + | 
 | 115 | +    time_2 = time.time()  | 
 | 116 | +    print('read data cost '+ str(time_2 - time_1)+' second')  | 
 | 117 | + | 
 | 118 | +    print('Start training')  | 
 | 119 | +    p = Softmax()  | 
 | 120 | +    p.train(train_features, train_labels)  | 
 | 121 | + | 
 | 122 | +    time_3 = time.time()  | 
 | 123 | +    print('training cost '+ str(time_3 - time_2)+' second')  | 
 | 124 | + | 
 | 125 | +    print('Start predicting')  | 
 | 126 | +    test_predict = p.predict(test_features)  | 
 | 127 | +    time_4 = time.time()  | 
 | 128 | +    print('predicting cost ' + str(time_4 - time_3) +' second')  | 
 | 129 | + | 
 | 130 | +    score = accuracy_score(test_labels, test_predict)  | 
 | 131 | +    print("The accruacy socre is " + str(score))  | 
0 commit comments