Skip to content

Commit 5bd740c

Browse files
committed
LogisticRegession 回归分析与python代码实现
1 parent c7be576 commit 5bd740c

File tree

5 files changed

+639
-0
lines changed

5 files changed

+639
-0
lines changed
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#coding:utf-8
2+
'''
3+
Created on 2016/4/24
4+
5+
@author: Gamer Think
6+
'''
7+
8+
from numpy import *
9+
10+
#加载数据集
11+
def loadDataSet():
12+
dataMat = []
13+
labelMat = []
14+
fp = open("ex1.txt")
15+
for line in fp.readlines():
16+
lineArr = line.strip().split() #分割
17+
dataMat.append([1.0,float(lineArr[0]), float(lineArr[1])])
18+
labelMat.append( int(lineArr[2]))
19+
20+
return dataMat,labelMat
21+
22+
#定义Sigmoid函数
23+
def sigmoid(inX):
24+
return 1.0/(1+exp(-inX))
25+
26+
#梯度上升算法求解最佳回归系数
27+
def gradAscent(dataMatIn,classLabels):
28+
dataMatrix = mat(dataMatIn) #将数组转为矩阵
29+
labelMat = mat(classLabels).transpose()
30+
m,n = shape(dataMatrix) #返回矩阵的行和列
31+
alpha = 0.001 #初始化 alpha的值
32+
maxCycles = 500 #最大迭代次数
33+
weights = ones((n,1)) #初始化最佳回归系数
34+
for i in range(0,maxCycles):
35+
#引用原书的代码,求梯度
36+
h = sigmoid(dataMatrix*weights)
37+
error = labelMat - h
38+
weights = weights + alpha * dataMatrix.transpose() * error
39+
40+
return weights
41+
42+
#随机梯度上升算法求回归系数
43+
def stocGradAscent0(dataMatrix,labelMat):
44+
dataMatrix = array(dataMatrix)
45+
m,n = shape(dataMatrix)
46+
alpha = 0.01
47+
weights = ones(n)
48+
for i in range(0,m):
49+
h = sigmoid(sum(dataMatrix[i]*weights))
50+
error = labelMat[i] - h
51+
weights = weights + alpha * error * dataMatrix[i]
52+
53+
return weights
54+
55+
56+
#改进版的随机梯度上升算法
57+
def stocGradAscent1(dataMatrix,labelMat,numIter=150):
58+
m,n = shape(dataMatrix)
59+
weights = ones(n)
60+
for i in range(0,numIter):
61+
dataIndex = range(m)
62+
for j in range(0,m):
63+
alpha = 4/(1.0+j+i)+0.01
64+
randIndex = int(random.uniform(0,len(dataIndex)))
65+
h = sigmoid(sum(dataMatrix[randIndex] * weights))
66+
error = labelMat[randIndex] - h
67+
weights = weights + alpha * error * dataMatrix[randIndex]
68+
del(dataIndex[randIndex])
69+
70+
return weights
71+
72+
#分析数据,画出决策边界
73+
def plotBestFit(wei,dataMatrix,labelMat):
74+
import matplotlib.pyplot as plt
75+
weights = wei #将矩阵wei转化为list
76+
dataArr = array(dataMatrix) #将矩阵转化为数组
77+
n = shape(dataMatrix)[0]
78+
xcord1 = [];ycord1=[]
79+
xcord2 = [];ycord2=[]
80+
81+
for i in range(n):
82+
if int(labelMat[i])==1:
83+
xcord1.append(dataArr[i,1])
84+
ycord1.append(dataArr[i,2])
85+
else:
86+
xcord2.append(dataArr[i,1])
87+
ycord2.append(dataArr[i,2])
88+
89+
fig = plt.figure()
90+
ax = fig.add_subplot(111)
91+
ax.scatter(xcord1,ycord1,s=30,c='red', marker='s')
92+
ax.scatter(xcord2,ycord2,s=30,c="green")
93+
x = arange(-3.0,3.0,0.1)
94+
y = (-weights[0]-weights[1] * x)/weights[2]
95+
ax.plot(x,y)
96+
plt.xlabel("x1") #X轴的标签
97+
plt.ylabel("x2") #Y轴的标签
98+
plt.show()
99+
100+
101+
102+
if __name__=="__main__":
103+
dataMatrix,labelMat = loadDataSet()
104+
#梯度上升算法
105+
# weight = gradAscent(dataMatrix, labelMat)
106+
# print weight
107+
# plotBestFit(weight.getA(),dataMatrix,labelMat)
108+
109+
#随机梯度上升算法
110+
# weight = stocGradAscent0(dataMatrix, labelMat)
111+
# print weight
112+
# plotBestFit(weight,dataMatrix,labelMat)
113+
114+
#改进版的随机梯度上升算法
115+
weight = stocGradAscent1(array(dataMatrix), labelMat)
116+
print weight
117+
plotBestFit(weight,dataMatrix,labelMat)
118+
119+
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#coding:utf-8
2+
'''
3+
Created on 2016/4/25
4+
5+
@author: Gamer Think
6+
'''
7+
import LogisticRegession as lr
8+
from numpy import *
9+
10+
#二分类问题进行分类
11+
def classifyVector(inX,weights):
12+
prob = lr.sigmoid(sum(inX * weights))
13+
if prob>0.5:
14+
return 1.0
15+
else:
16+
return 0.0
17+
18+
#训练和测试
19+
def colicTest():
20+
frTrain = open('horseColicTraining.txt'); frTest = open('horseColicTest.txt')
21+
trainingSet = []; trainingLabels = []
22+
#训练回归模型
23+
for line in frTrain.readlines():
24+
currLine = line.strip().split('\t')
25+
lineArr =[]
26+
for i in range(21):
27+
lineArr.append(float(currLine[i]))
28+
trainingSet.append(lineArr)
29+
trainingLabels.append(float(currLine[21]))
30+
trainWeights = lr.stocGradAscent1(array(trainingSet), trainingLabels, 1000)
31+
errorCount = 0; numTestVec = 0.0
32+
#测试回归模型
33+
for line in frTest.readlines():
34+
numTestVec += 1.0
35+
currLine = line.strip().split('\t')
36+
lineArr =[]
37+
for i in range(21):
38+
lineArr.append(float(currLine[i]))
39+
if int(classifyVector(array(lineArr), trainWeights))!= int(currLine[21]):
40+
errorCount += 1
41+
errorRate = (float(errorCount)/numTestVec)
42+
print "the error rate of this test is: %f" % errorRate
43+
return errorRate
44+
45+
def multiTest():
46+
numTests = 10
47+
errorSum = 0.0
48+
for k in range(numTests):
49+
errorSum += colicTest()
50+
print "after %d iterations the average error rate is: %f" % (numTests,errorSum/float(numTests))
51+
52+
53+
if __name__=="__main__":
54+
multiTest()

Logistic Regession/ex1.txt

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
-0.017612 14.053064 0
2+
-1.395634 4.662541 1
3+
-0.752157 6.538620 0
4+
-1.322371 7.152853 0
5+
0.423363 11.054677 0
6+
0.406704 7.067335 1
7+
0.667394 12.741452 0
8+
-2.460150 6.866805 1
9+
0.569411 9.548755 0
10+
-0.026632 10.427743 0
11+
0.850433 6.920334 1
12+
1.347183 13.175500 0
13+
1.176813 3.167020 1
14+
-1.781871 9.097953 0
15+
-0.566606 5.749003 1
16+
0.931635 1.589505 1
17+
-0.024205 6.151823 1
18+
-0.036453 2.690988 1
19+
-0.196949 0.444165 1
20+
1.014459 5.754399 1
21+
1.985298 3.230619 1
22+
-1.693453 -0.557540 1
23+
-0.576525 11.778922 0
24+
-0.346811 -1.678730 1
25+
-2.124484 2.672471 1
26+
1.217916 9.597015 0
27+
-0.733928 9.098687 0
28+
-3.642001 -1.618087 1
29+
0.315985 3.523953 1
30+
1.416614 9.619232 0
31+
-0.386323 3.989286 1
32+
0.556921 8.294984 1
33+
1.224863 11.587360 0
34+
-1.347803 -2.406051 1
35+
1.196604 4.951851 1
36+
0.275221 9.543647 0
37+
0.470575 9.332488 0
38+
-1.889567 9.542662 0
39+
-1.527893 12.150579 0
40+
-1.185247 11.309318 0
41+
-0.445678 3.297303 1
42+
1.042222 6.105155 1
43+
-0.618787 10.320986 0
44+
1.152083 0.548467 1
45+
0.828534 2.676045 1
46+
-1.237728 10.549033 0
47+
-0.683565 -2.166125 1
48+
0.229456 5.921938 1
49+
-0.959885 11.555336 0
50+
0.492911 10.993324 0
51+
0.184992 8.721488 0
52+
-0.355715 10.325976 0
53+
-0.397822 8.058397 0
54+
0.824839 13.730343 0
55+
1.507278 5.027866 1
56+
0.099671 6.835839 1
57+
-0.344008 10.717485 0
58+
1.785928 7.718645 1
59+
-0.918801 11.560217 0
60+
-0.364009 4.747300 1
61+
-0.841722 4.119083 1
62+
0.490426 1.960539 1
63+
-0.007194 9.075792 0
64+
0.356107 12.447863 0
65+
0.342578 12.281162 0
66+
-0.810823 -1.466018 1
67+
2.530777 6.476801 1
68+
1.296683 11.607559 0
69+
0.475487 12.040035 0
70+
-0.783277 11.009725 0
71+
0.074798 11.023650 0
72+
-1.337472 0.468339 1
73+
-0.102781 13.763651 0
74+
-0.147324 2.874846 1
75+
0.518389 9.887035 0
76+
1.015399 7.571882 0
77+
-1.658086 -0.027255 1
78+
1.319944 2.171228 1
79+
2.056216 5.019981 1
80+
-0.851633 4.375691 1
81+
-1.510047 6.061992 0
82+
-1.076637 -3.181888 1
83+
1.821096 10.283990 0
84+
3.010150 8.401766 1
85+
-1.099458 1.688274 1
86+
-0.834872 -1.733869 1
87+
-0.846637 3.849075 1
88+
1.400102 12.628781 0
89+
1.752842 5.468166 1
90+
0.078557 0.059736 1
91+
0.089392 -0.715300 1
92+
1.825662 12.693808 0
93+
0.197445 9.744638 0
94+
0.126117 0.922311 1
95+
-0.679797 1.220530 1
96+
0.677983 2.556666 1
97+
0.761349 10.693862 0
98+
-2.168791 0.143632 1
99+
1.388610 9.341997 0
100+
0.317029 14.739025 0
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
2 1 38.50 54 20 0 1 2 2 3 4 1 2 2 5.90 0 2 42.00 6.30 0 0 1
2+
2 1 37.60 48 36 0 0 1 1 0 3 0 0 0 0 0 0 44.00 6.30 1 5.00 1
3+
1 1 37.7 44 28 0 4 3 2 5 4 4 1 1 0 3 5 45 70 3 2 1
4+
1 1 37 56 24 3 1 4 2 4 4 3 1 1 0 0 0 35 61 3 2 0
5+
2 1 38.00 42 12 3 0 3 1 1 0 1 0 0 0 0 2 37.00 5.80 0 0 1
6+
1 1 0 60 40 3 0 1 1 0 4 0 3 2 0 0 5 42 72 0 0 1
7+
2 1 38.40 80 60 3 2 2 1 3 2 1 2 2 0 1 1 54.00 6.90 0 0 1
8+
2 1 37.80 48 12 2 1 2 1 3 0 1 2 0 0 2 0 48.00 7.30 1 0 1
9+
2 1 37.90 45 36 3 3 3 2 2 3 1 2 1 0 3 0 33.00 5.70 3 0 1
10+
2 1 39.00 84 12 3 1 5 1 2 4 2 1 2 7.00 0 4 62.00 5.90 2 2.20 0
11+
2 1 38.20 60 24 3 1 3 2 3 3 2 3 3 0 4 4 53.00 7.50 2 1.40 1
12+
1 1 0 140 0 0 0 4 2 5 4 4 1 1 0 0 5 30 69 0 0 0
13+
1 1 37.90 120 60 3 3 3 1 5 4 4 2 2 7.50 4 5 52.00 6.60 3 1.80 0
14+
2 1 38.00 72 36 1 1 3 1 3 0 2 2 1 0 3 5 38.00 6.80 2 2.00 1
15+
2 9 38.00 92 28 1 1 2 1 1 3 2 3 0 7.20 0 0 37.00 6.10 1 1.10 1
16+
1 1 38.30 66 30 2 3 1 1 2 4 3 3 2 8.50 4 5 37.00 6.00 0 0 1
17+
2 1 37.50 48 24 3 1 1 1 2 1 0 1 1 0 3 2 43.00 6.00 1 2.80 1
18+
1 1 37.50 88 20 2 3 3 1 4 3 3 0 0 0 0 0 35.00 6.40 1 0 0
19+
2 9 0 150 60 4 4 4 2 5 4 4 0 0 0 0 0 0 0 0 0 0
20+
1 1 39.7 100 30 0 0 6 2 4 4 3 1 0 0 4 5 65 75 0 0 0
21+
1 1 38.30 80 0 3 3 4 2 5 4 3 2 1 0 4 4 45.00 7.50 2 4.60 1
22+
2 1 37.50 40 32 3 1 3 1 3 2 3 2 1 0 0 5 32.00 6.40 1 1.10 1
23+
1 1 38.40 84 30 3 1 5 2 4 3 3 2 3 6.50 4 4 47.00 7.50 3 0 0
24+
1 1 38.10 84 44 4 0 4 2 5 3 1 1 3 5.00 0 4 60.00 6.80 0 5.70 0
25+
2 1 38.70 52 0 1 1 1 1 1 3 1 0 0 0 1 3 4.00 74.00 0 0 1
26+
2 1 38.10 44 40 2 1 3 1 3 3 1 0 0 0 1 3 35.00 6.80 0 0 1
27+
2 1 38.4 52 20 2 1 3 1 1 3 2 2 1 0 3 5 41 63 1 1 1
28+
1 1 38.20 60 0 1 0 3 1 2 1 1 1 1 0 4 4 43.00 6.20 2 3.90 1
29+
2 1 37.70 40 18 1 1 1 0 3 2 1 1 1 0 3 3 36.00 3.50 0 0 1
30+
1 1 39.1 60 10 0 1 1 0 2 3 0 0 0 0 4 4 0 0 0 0 1
31+
2 1 37.80 48 16 1 1 1 1 0 1 1 2 1 0 4 3 43.00 7.50 0 0 1
32+
1 1 39.00 120 0 4 3 5 2 2 4 3 2 3 8.00 0 0 65.00 8.20 3 4.60 1
33+
1 1 38.20 76 0 2 3 2 1 5 3 3 1 2 6.00 1 5 35.00 6.50 2 0.90 1
34+
2 1 38.30 88 0 0 0 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0
35+
1 1 38.00 80 30 3 3 3 1 0 0 0 0 0 6.00 0 0 48.00 8.30 0 4.30 1
36+
1 1 0 0 0 3 1 1 1 2 3 3 1 3 6.00 4 4 0 0 2 0 0
37+
1 1 37.60 40 0 1 1 1 1 1 1 1 0 0 0 1 1 0 0 2 2.10 1
38+
2 1 37.50 44 0 1 1 1 1 3 3 2 0 0 0 0 0 45.00 5.80 2 1.40 1
39+
2 1 38.2 42 16 1 1 3 1 1 3 1 0 0 0 1 0 35 60 1 1 1
40+
2 1 38 56 44 3 3 3 0 0 1 1 2 1 0 4 0 47 70 2 1 1
41+
2 1 38.30 45 20 3 3 2 2 2 4 1 2 0 0 4 0 0 0 0 0 1
42+
1 1 0 48 96 1 1 3 1 0 4 1 2 1 0 1 4 42.00 8.00 1 0 1
43+
1 1 37.70 55 28 2 1 2 1 2 3 3 0 3 5.00 4 5 0 0 0 0 1
44+
2 1 36.00 100 20 4 3 6 2 2 4 3 1 1 0 4 5 74.00 5.70 2 2.50 0
45+
1 1 37.10 60 20 2 0 4 1 3 0 3 0 2 5.00 3 4 64.00 8.50 2 0 1
46+
2 1 37.10 114 40 3 0 3 2 2 2 1 0 0 0 0 3 32.00 0 3 6.50 1
47+
1 1 38.1 72 30 3 3 3 1 4 4 3 2 1 0 3 5 37 56 3 1 1
48+
1 1 37.00 44 12 3 1 1 2 1 1 1 0 0 0 4 2 40.00 6.70 3 8.00 1
49+
1 1 38.6 48 20 3 1 1 1 4 3 1 0 0 0 3 0 37 75 0 0 1
50+
1 1 0 82 72 3 1 4 1 2 3 3 0 3 0 4 4 53 65 3 2 0
51+
1 9 38.20 78 60 4 4 6 0 3 3 3 0 0 0 1 0 59.00 5.80 3 3.10 0
52+
2 1 37.8 60 16 1 1 3 1 2 3 2 1 2 0 3 0 41 73 0 0 0
53+
1 1 38.7 34 30 2 0 3 1 2 3 0 0 0 0 0 0 33 69 0 2 0
54+
1 1 0 36 12 1 1 1 1 1 2 1 1 1 0 1 5 44.00 0 0 0 1
55+
2 1 38.30 44 60 0 0 1 1 0 0 0 0 0 0 0 0 6.40 36.00 0 0 1
56+
2 1 37.40 54 18 3 0 1 1 3 4 3 2 2 0 4 5 30.00 7.10 2 0 1
57+
1 1 0 0 0 4 3 0 2 2 4 1 0 0 0 0 0 54 76 3 2 1
58+
1 1 36.6 48 16 3 1 3 1 4 1 1 1 1 0 0 0 27 56 0 0 0
59+
1 1 38.5 90 0 1 1 3 1 3 3 3 2 3 2 4 5 47 79 0 0 1
60+
1 1 0 75 12 1 1 4 1 5 3 3 0 3 5.80 0 0 58.00 8.50 1 0 1
61+
2 1 38.20 42 0 3 1 1 1 1 1 2 2 1 0 3 2 35.00 5.90 2 0 1
62+
1 9 38.20 78 60 4 4 6 0 3 3 3 0 0 0 1 0 59.00 5.80 3 3.10 0
63+
2 1 38.60 60 30 1 1 3 1 4 2 2 1 1 0 0 0 40.00 6.00 1 0 1
64+
2 1 37.80 42 40 1 1 1 1 1 3 1 0 0 0 3 3 36.00 6.20 0 0 1
65+
1 1 38 60 12 1 1 2 1 2 1 1 1 1 0 1 4 44 65 3 2 0
66+
2 1 38.00 42 12 3 0 3 1 1 1 1 0 0 0 0 1 37.00 5.80 0 0 1
67+
2 1 37.60 88 36 3 1 1 1 3 3 2 1 3 1.50 0 0 44.00 6.00 0 0 0

0 commit comments

Comments
 (0)