Skip to content

Commit f6b85ec

Browse files
committed
add 李航统计学习方法
1 parent c856ac3 commit f6b85ec

File tree

20 files changed

+1076
-0
lines changed

20 files changed

+1076
-0
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
"""
2+
@ jetou
3+
@ AdaBoost algorithm
4+
@ date 2017 11 19
5+
6+
"""
7+
from weaker_classifier import *
8+
import math
9+
10+
class adaBoost:
11+
def __init__(self, feature, label, Epsilon = 0):
12+
self.feature = np.array(feature)
13+
self.label = np.array(label)
14+
self.Epsilon = Epsilon
15+
self.N = self.feature.shape[0]
16+
self.error = 1
17+
self.count_error = 1
18+
19+
self.alpha = []
20+
self.classifier = []
21+
self.W = [1.0 / self.N for i in range(self.N)]
22+
23+
24+
def sign(self, value):
25+
if value > 0:
26+
value = 1
27+
elif value < 0:
28+
value = -1
29+
else:
30+
value = 0
31+
32+
return value
33+
34+
def update_W_(self):
35+
update_W = []
36+
z = 0
37+
for i in range(self.N):
38+
pe = np.array([self.feature[i]])
39+
z += self.W[i] * math.exp(-1 * self.alpha[-1] * self.label[i] * self.classifier[-1].prediction(pe))
40+
41+
for i in range(self.N):
42+
kk = np.array([self.feature[i]])
43+
w = self.W[i] * math.exp(-1 * self.alpha[-1] * self.label[i] * self.classifier[-1].prediction(kk)) / z
44+
update_W.append(w)
45+
self.W = update_W
46+
47+
def __alpha__(self):
48+
self.alpha.append(math.log((1-self.error)/self.error)/2)
49+
50+
51+
def prediction(self, label):
52+
finaly_prediction = []
53+
classifier_offset = len(self.classifier)
54+
55+
for i in range(len(label)):
56+
result = 0
57+
for j in range(classifier_offset):
58+
pe = np.array([label[i]])
59+
result += self.alpha[j] * self.classifier[j].prediction(pe)
60+
finaly_prediction.append(self.sign(result))
61+
62+
return finaly_prediction
63+
64+
def complute_error(self):
65+
# compute error
66+
result = self.prediction(self.feature)
67+
count_error = 0
68+
for i in range(self.N):
69+
if result[i] * self.label[i] < 0:
70+
count_error+=1
71+
self.count_error = count_error / (self.N * 1.0) #compute error%
72+
73+
74+
75+
def train(self):
76+
while(self.count_error > self.Epsilon):
77+
classifier = weake_classifier(self.feature, self.label, self.W)
78+
self.classifier.append(classifier)
79+
classifier.train()
80+
self.error, self.W, dem = classifier.get_information()
81+
self.__alpha__()
82+
self.update_W_()
83+
self.complute_error()
84+
85+
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from AdaBoost import *
2+
3+
4+
feature = np.array([
5+
[0],
6+
[1],
7+
[2],
8+
[3],
9+
[4],
10+
[5],
11+
[6],
12+
[7],
13+
[8],
14+
[9],
15+
])
16+
17+
label = np.array([
18+
[1],
19+
[1],
20+
[1],
21+
[-1],
22+
[-1],
23+
[-1],
24+
[1],
25+
[1],
26+
[1],
27+
[-1],
28+
])
29+
30+
test = np.array([
31+
[2],
32+
[2],
33+
[6],
34+
[4],
35+
])
36+
37+
a = adaBoost(feature, label)
38+
a.train()
39+
print a.prediction(test)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""
2+
@ jetou
3+
@ weaker_classifier algorithm
4+
@ date 2017 11 19
5+
6+
"""
7+
import numpy as np
8+
class weake_classifier:
9+
def __init__(self, feature, label, W = None):
10+
self.feature = np.array(feature)
11+
self.label = np.array(label)
12+
13+
self.setlable = np.unique(label)
14+
self.feature_dem = self.feature.shape[1]
15+
self.N = self.feature.shape[0]
16+
17+
if W != None:
18+
self.W = np.array(W)
19+
else:
20+
self.W = [1.0 / self.N for i in range(self.N)]
21+
22+
23+
def prediction(self, feature):
24+
test_feature = np.array(feature)
25+
output = np.ones((test_feature.shape[0],1))
26+
output[test_feature[:, self.demention] * self.finaly_label < self.threshold * self.finaly_label] = -1
27+
28+
29+
return output
30+
31+
def __str__(self):
32+
string = "opt_threshold:" + str(self.threshold) + "\n"
33+
string += "opt_demention:" + str(self.demention) + "\n"
34+
string += "opt_errorRate:" + str(self.error) + "\n"
35+
string += "opt_label :" + str(self.finaly_label) + "\n"
36+
string += "weights :" + str(self.W) + "\n"
37+
38+
return string
39+
40+
def best_along_dem(self, demention, label):
41+
feature_max = np.max(self.feature)
42+
feature_min = np.min(self.feature)
43+
step = (feature_max - feature_min) / (self.N * 1.0)
44+
min_error = self.N * 1.0
45+
46+
for i in np.arange(feature_min, feature_max, step):
47+
output = np.ones((self.N, 1))
48+
output[self.feature[:, demention] * label < i * label] = -1
49+
50+
errorRate = 0.0
51+
for j in range(self.N):
52+
if output[j] * self.label[j] < 0:
53+
errorRate += self.W[j]
54+
55+
if errorRate < min_error:
56+
min_error = errorRate
57+
threshold = i
58+
59+
return threshold, min_error
60+
61+
def train(self):
62+
self.error = self.N * 1.0
63+
for demention in range(self.feature_dem):
64+
for label in self.label:
65+
threshold, err = self.best_along_dem(demention, label)
66+
if self.error > err:
67+
self.error = err
68+
self.finaly_label = label
69+
self.threshold = threshold
70+
self.demention = demention
71+
72+
def get_information(self):
73+
return self.error, self.W, self.demention
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from weaker_classifier import *
2+
3+
feature = np.array([
4+
[0],
5+
[1],
6+
[2],
7+
[3],
8+
[4],
9+
[5],
10+
[6],
11+
[7],
12+
[8],
13+
[9],
14+
])
15+
16+
label = np.array([
17+
[1],
18+
[1],
19+
[1],
20+
[-1],
21+
[-1],
22+
[-1],
23+
[1],
24+
[1],
25+
[1],
26+
[-1],
27+
])
28+
29+
test = np.array([
30+
[2],
31+
])
32+
33+
d=np.array([
34+
[0.007143],
35+
[0.07143],
36+
[0.07143],
37+
[0.07143],
38+
[0.07143],
39+
[0.07143],
40+
[0.16667],
41+
[0.16667],
42+
[0.16667],
43+
[0.07143],
44+
])
45+
pp = []
46+
a = weake_classifier(feature, label)
47+
b = weake_classifier(feature, label,d)
48+
a.train()
49+
print a.__str__()
50+
print a.prediction(test)
51+
b.train()
52+
print b.__str__()
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# -*- coding: utf-8 -*-
2+
3+
"""
4+
@ jetou
5+
@ Gaussian misture model
6+
@ date 2017 11 27
7+
8+
"""
9+
# Reference http://blog.csdn.net/jinping_shi/article/details/59613054
10+
11+
12+
import numpy as np
13+
import math
14+
import copy
15+
16+
class EmGMM:
17+
def __init__(self, sigma, k, N, MU, epsilon):
18+
"""
19+
k is the number of Gaussian distribution
20+
N is the number of feature
21+
sigma is variance
22+
"""
23+
self.k = k
24+
self.N = N
25+
self.epsilon = epsilon
26+
self.sigma = sigma
27+
self.MU = np.matrix(MU)
28+
self.alpha = [0.5, 0.5]
29+
30+
def init_data(self):
31+
self.X = np.matrix(np.zeros((self.N, 2)))
32+
self.Mu = np.random.random(self.k)
33+
self.Expectations = np.zeros((self.N, self.k))
34+
for i in xrange(self.N):
35+
if np.random.random(1) > 0.5:
36+
self.X[i,:] = np.random.multivariate_normal(self.MU.tolist()[0], self.sigma, 1)
37+
else:
38+
self.X[i,:] = np.random.multivariate_normal(self.MU.tolist()[1], self.sigma, 1)
39+
40+
def e_step(self):
41+
for i in range(self.N):
42+
Denom = 0
43+
Numer = [0.0] * self.k
44+
for j in range (self.k):
45+
Numer[j] = self.alpha[j] * math.exp(-(self.X[i,:] - self.MU[j,:]) * self.sigma.I * np.transpose(self.X[i,:] - self.MU[j,:])) \
46+
/ np.sqrt(np.linalg.det(self.sigma))
47+
Denom += Numer[j]
48+
for j in range(0, self.k):
49+
self.Expectations[i, j] = Numer[j] / Denom
50+
51+
def m_step(self):
52+
for j in xrange(0, self.k):
53+
Numer = 0
54+
Denom = 0
55+
sabi = 0
56+
for i in xrange(0, self.N):
57+
Numer += self.Expectations[i, j] * self.X[i, :]
58+
Denom += self.Expectations[i, j]
59+
self.MU[j, :] = Numer / Denom
60+
self.alpha[j] = Denom / self.N
61+
for i in xrange(0, self.N):
62+
sabi += self.Expectations[i, j] * np.square((self.X[i, :] - self.MU[j, :]))
63+
self.sigma[j, :]= sabi / Denom
64+
65+
def train(self, inter=1000):
66+
self.init_data()
67+
for i in range(inter):
68+
error = 0
69+
err_alpha = 0
70+
err_sigma = 0
71+
old_mu = copy.deepcopy(self.MU)
72+
old_alpha = copy.deepcopy(self.alpha)
73+
old_sigma = copy.deepcopy(self.sigma)
74+
self.e_step()
75+
self.m_step()
76+
print "The number of iterations", i
77+
print "Location parameters: mu", self.MU
78+
print "variance: sigma", self.sigma
79+
print "Selected probability: alpha", self.alpha
80+
for j in range(self.k):
81+
error += (abs(old_mu[j, 0] - self.MU[j, 0]) + abs(old_mu[j, 1] - self.MU[j, 1]))
82+
err_sigma += (abs(old_sigma[j, 0] - self.sigma[j, 0]) + abs(old_sigma[j, 1] - self.sigma[j, 1]))
83+
err_alpha += abs(old_alpha[j] - self.alpha[j])
84+
if (error <= self.epsilon) and (err_sigma <= self.epsilon) and (err_alpha <= self.epsilon):
85+
print error, err_alpha, err_sigma
86+
break
87+
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from Gmm import *
2+
3+
sigma = np.matrix([[30, 0], [0, 30]])
4+
MU = [[40, 20], [5, 35]]
5+
a = EmGMM(sigma, 2, 1000, MU, 0.001)
6+
7+
a.train()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
machine learning algorithm in the <统计学习方法>

0 commit comments

Comments
 (0)