lzhanggit
diff --git a/‎books/李航-统计学习/machine_learning_algorithm-master/AdaBoost/AdaBoost.py
Lines changed: 85 additions & 0 deletions b/‎books/李航-统计学习/machine_learning_algorithm-master/AdaBoost/AdaBoost.py
Lines changed: 85 additions & 0 deletions
diff --git a/‎books/李航-统计学习/machine_learning_algorithm-master/AdaBoost/AdaBoost_test.py
Lines changed: 39 additions & 0 deletions b/‎books/李航-统计学习/machine_learning_algorithm-master/AdaBoost/AdaBoost_test.py
Lines changed: 39 additions & 0 deletions
diff --git a/‎books/李航-统计学习/machine_learning_algorithm-master/AdaBoost/weaker_classifier.py
Lines changed: 73 additions & 0 deletions b/‎books/李航-统计学习/machine_learning_algorithm-master/AdaBoost/weaker_classifier.py
Lines changed: 73 additions & 0 deletions
diff --git a/‎books/李航-统计学习/machine_learning_algorithm-master/AdaBoost/weaker_test.py
Lines changed: 52 additions & 0 deletions b/‎books/李航-统计学习/machine_learning_algorithm-master/AdaBoost/weaker_test.py
Lines changed: 52 additions & 0 deletions
diff --git a/‎books/李航-统计学习/machine_learning_algorithm-master/EM/Gmm.py
Lines changed: 87 additions & 0 deletions b/‎books/李航-统计学习/machine_learning_algorithm-master/EM/Gmm.py
Lines changed: 87 additions & 0 deletions
diff --git a/‎books/李航-统计学习/machine_learning_algorithm-master/EM/gmm_test.py
Lines changed: 7 additions & 0 deletions b/‎books/李航-统计学习/machine_learning_algorithm-master/EM/gmm_test.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎books/李航-统计学习/machine_learning_algorithm-master/README.md
Lines changed: 1 addition & 0 deletions b/‎books/李航-统计学习/machine_learning_algorithm-master/README.md
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,85 @@
+"""
+    @ jetou
+    @ AdaBoost algorithm
+    @ date 2017 11 19
+
+"""
+from weaker_classifier import *
+import math
+
+class adaBoost:
+    def __init__(self, feature, label, Epsilon = 0):
+        self.feature = np.array(feature)
+        self.label   = np.array(label)
+        self.Epsilon = Epsilon
+        self.N       = self.feature.shape[0]
+        self.error   = 1
+        self.count_error = 1
+
+        self.alpha = []
+        self.classifier = []
+        self.W = [1.0 / self.N for i in range(self.N)]
+
+
+    def sign(self, value):
+        if value > 0:
+            value = 1
+        elif value < 0:
+            value = -1
+        else:
+            value = 0
+
+        return value
+
+    def update_W_(self):
+        update_W = []
+        z = 0
+        for i in range(self.N):
+            pe = np.array([self.feature[i]])
+            z += self.W[i] * math.exp(-1 * self.alpha[-1] * self.label[i] * self.classifier[-1].prediction(pe))
+
+        for i in range(self.N):
+            kk = np.array([self.feature[i]])
+            w = self.W[i] * math.exp(-1 * self.alpha[-1] * self.label[i] * self.classifier[-1].prediction(kk)) / z
+            update_W.append(w)
+        self.W = update_W
+
+    def __alpha__(self):
+       self.alpha.append(math.log((1-self.error)/self.error)/2)
+
+
+    def prediction(self, label):
+        finaly_prediction = []
+        classifier_offset = len(self.classifier)
+
+        for i in range(len(label)):
+            result = 0
+            for j in range(classifier_offset):
+                pe = np.array([label[i]])
+                result += self.alpha[j] * self.classifier[j].prediction(pe)
+            finaly_prediction.append(self.sign(result))
+
+        return finaly_prediction
+
+    def complute_error(self):
+        # compute error
+        result = self.prediction(self.feature)
+        count_error = 0
+        for i in range(self.N):
+            if result[i] * self.label[i] < 0:
+                count_error+=1
+        self.count_error = count_error / (self.N * 1.0)  #compute error%
+
+
+
+    def train(self):
+        while(self.count_error > self.Epsilon):
+            classifier = weake_classifier(self.feature, self.label, self.W)
+            self.classifier.append(classifier)
+            classifier.train()
+            self.error, self.W, dem = classifier.get_information()
+            self.__alpha__()
+            self.update_W_()
+            self.complute_error()
+
+
@@ -0,0 +1,39 @@
+from AdaBoost import *
+
+
+feature = np.array([
+    [0],
+    [1],
+    [2],
+    [3],
+    [4],
+    [5],
+    [6],
+    [7],
+    [8],
+    [9],
+])
+
+label = np.array([
+    [1],
+    [1],
+    [1],
+    [-1],
+    [-1],
+    [-1],
+    [1],
+    [1],
+    [1],
+    [-1],
+])
+
+test = np.array([
+    [2],
+    [2],
+    [6],
+    [4],
+])
+
+a = adaBoost(feature, label)
+a.train()
+print a.prediction(test)
@@ -0,0 +1,73 @@
+"""
+    @ jetou
+    @ weaker_classifier algorithm
+    @ date 2017 11 19
+
+"""
+import numpy as np
+class weake_classifier:
+    def __init__(self, feature, label, W = None):
+        self.feature = np.array(feature)
+        self.label   = np.array(label)
+
+        self.setlable = np.unique(label)
+        self.feature_dem = self.feature.shape[1]
+        self.N = self.feature.shape[0]
+
+        if W != None:
+            self.W = np.array(W)
+        else:
+            self.W = [1.0 / self.N for i in range(self.N)]
+
+
+    def prediction(self, feature):
+        test_feature = np.array(feature)
+        output = np.ones((test_feature.shape[0],1))
+        output[test_feature[:, self.demention] * self.finaly_label < self.threshold * self.finaly_label] = -1
+
+
+        return output
+
+    def __str__(self):
+        string  = "opt_threshold:" + str(self.threshold)    + "\n"
+        string += "opt_demention:" + str(self.demention)    + "\n"
+        string += "opt_errorRate:" + str(self.error)        + "\n"
+        string += "opt_label    :" + str(self.finaly_label) + "\n"
+        string += "weights      :" + str(self.W)            + "\n"
+
+        return string
+
+    def best_along_dem(self, demention, label):
+        feature_max = np.max(self.feature)
+        feature_min = np.min(self.feature)
+        step = (feature_max - feature_min) / (self.N * 1.0)
+        min_error = self.N * 1.0
+
+        for i in np.arange(feature_min, feature_max, step):
+            output = np.ones((self.N, 1))
+            output[self.feature[:, demention] * label < i * label] = -1
+
+            errorRate = 0.0
+            for j in range(self.N):
+                if output[j] * self.label[j] < 0:
+                    errorRate += self.W[j]
+
+            if errorRate < min_error:
+                min_error = errorRate
+                threshold = i
+
+        return  threshold, min_error
+
+    def train(self):
+        self.error = self.N * 1.0
+        for demention in range(self.feature_dem):
+            for label in self.label:
+                threshold, err = self.best_along_dem(demention, label)
+                if self.error > err:
+                    self.error = err
+                    self.finaly_label = label
+                    self.threshold = threshold
+                    self.demention = demention
+
+    def get_information(self):
+        return self.error, self.W, self.demention
@@ -0,0 +1,52 @@
+from weaker_classifier import *
+
+feature = np.array([
+    [0],
+    [1],
+    [2],
+    [3],
+    [4],
+    [5],
+    [6],
+    [7],
+    [8],
+    [9],
+])
+
+label = np.array([
+    [1],
+    [1],
+    [1],
+    [-1],
+    [-1],
+    [-1],
+    [1],
+    [1],
+    [1],
+    [-1],
+])
+
+test = np.array([
+    [2],
+])
+
+d=np.array([
+    [0.007143],
+    [0.07143],
+    [0.07143],
+    [0.07143],
+    [0.07143],
+    [0.07143],
+    [0.16667],
+    [0.16667],
+    [0.16667],
+    [0.07143],
+])
+pp = []
+a = weake_classifier(feature, label)
+b = weake_classifier(feature, label,d)
+a.train()
+print a.__str__()
+print a.prediction(test)
+b.train()
+print b.__str__()
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+
+"""
+    @ jetou
+    @ Gaussian misture model
+    @ date 2017 11 27
+
+"""
+# Reference http://blog.csdn.net/jinping_shi/article/details/59613054
+
+
+import numpy as np
+import math
+import copy
+
+class EmGMM:
+    def __init__(self, sigma, k, N, MU, epsilon):
+        """
+        k is the number of Gaussian distribution
+        N is the number of feature
+        sigma is variance
+        """
+        self.k = k
+        self.N = N
+        self.epsilon = epsilon
+        self.sigma = sigma
+        self.MU = np.matrix(MU)
+        self.alpha = [0.5, 0.5]
+
+    def init_data(self):
+        self.X = np.matrix(np.zeros((self.N, 2)))
+        self.Mu = np.random.random(self.k)
+        self.Expectations = np.zeros((self.N, self.k))
+        for i in xrange(self.N):
+            if np.random.random(1) > 0.5:
+                self.X[i,:] = np.random.multivariate_normal(self.MU.tolist()[0], self.sigma, 1)
+            else:
+                self.X[i,:] = np.random.multivariate_normal(self.MU.tolist()[1], self.sigma, 1)
+
+    def e_step(self):
+        for i in range(self.N):
+            Denom = 0
+            Numer = [0.0] * self.k
+            for j in range (self.k):
+                Numer[j] = self.alpha[j] * math.exp(-(self.X[i,:] - self.MU[j,:]) * self.sigma.I * np.transpose(self.X[i,:] - self.MU[j,:])) \
+                           / np.sqrt(np.linalg.det(self.sigma))
+                Denom += Numer[j]
+            for j in range(0, self.k):
+                self.Expectations[i, j] = Numer[j] / Denom
+
+    def m_step(self):
+        for j in xrange(0, self.k):
+            Numer = 0
+            Denom = 0
+            sabi = 0
+            for i in xrange(0, self.N):
+                Numer += self.Expectations[i, j] * self.X[i, :]
+                Denom += self.Expectations[i, j]
+            self.MU[j, :] = Numer / Denom
+            self.alpha[j] = Denom / self.N
+            for i in xrange(0, self.N):
+                sabi += self.Expectations[i, j] * np.square((self.X[i, :] - self.MU[j, :]))
+            self.sigma[j, :]=  sabi / Denom
+
+    def train(self, inter=1000):
+        self.init_data()
+        for i in range(inter):
+            error = 0
+            err_alpha = 0
+            err_sigma = 0
+            old_mu = copy.deepcopy(self.MU)
+            old_alpha = copy.deepcopy(self.alpha)
+            old_sigma = copy.deepcopy(self.sigma)
+            self.e_step()
+            self.m_step()
+            print "The number of iterations", i
+            print "Location parameters: mu", self.MU
+            print "variance: sigma", self.sigma
+            print "Selected probability: alpha", self.alpha
+            for j in range(self.k):
+                error += (abs(old_mu[j, 0] - self.MU[j, 0]) + abs(old_mu[j, 1] - self.MU[j, 1]))
+                err_sigma += (abs(old_sigma[j, 0] - self.sigma[j, 0]) + abs(old_sigma[j, 1] - self.sigma[j, 1]))
+                err_alpha += abs(old_alpha[j] - self.alpha[j])
+            if (error <= self.epsilon) and (err_sigma <= self.epsilon) and (err_alpha <= self.epsilon):
+                print error, err_alpha, err_sigma
+                break
+
@@ -0,0 +1,7 @@
+from Gmm import *
+
+sigma = np.matrix([[30, 0], [0, 30]])
+MU = [[40, 20], [5, 35]]
+a = EmGMM(sigma, 2, 1000, MU, 0.001)
+
+a.train()
@@ -0,0 +1 @@
+machine learning algorithm in the <统计学习方法>
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+machine learning algorithm in the <统计学习方法>`