From d0503cbe6a8f1ff5af9db410cd853ef916966edb Mon Sep 17 00:00:00 2001
From: justdark <justdark@users.noreply.github.com>
Date: Sun, 13 Nov 2016 15:37:09 +0800
Subject: [PATCH 1/4] Update pca.py

bug
---
 dml/tool/pca.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/dml/tool/pca.py b/dml/tool/pca.py
index a23b7ba..b0e34c6 100644
--- a/dml/tool/pca.py
+++ b/dml/tool/pca.py
@@ -10,12 +10,13 @@ def pca(X,axis=0):
 		Returns the eigenvectors U, the eigenvalues (on diagonal) in S
 	'''
 	X_s=np.array(X)
+	X_s = (X_s.transpose()-np.mean(X, axis=1)).transpose()
 	if axis==0:
 		N,M=X_s.shape
-		Sigma=np.dot(X_s,X_s.transpose())/M
+		Sigma=np.dot(X_s,X_s.transpose())/(M-1)
 	else:
 		M,N=X_s.shape
-		Sigma=np.dot(X_s.transpose(),X_s)/M
+		Sigma=np.dot(X_s.transpose(),X_s)/(M-1)
 	U,S,V = sp.linalg.svd(Sigma);
 	return U,S
 
@@ -25,4 +26,4 @@ def projectData(X,U,K):
 	return np.dot(U[:,:K].transpose(),X)
 def recoverData(Z,U,K):
 
-	return np.dot(U[:,:K],Z)
\ No newline at end of file
+	return np.dot(U[:,:K],Z)

From 10a1d9f741fe3b2b9bd1ced51eed9a2124f9adf3 Mon Sep 17 00:00:00 2001
From: admin <admin@netrestoreclient-9801a7a56863.local>
Date: Wed, 14 Dec 2016 10:33:34 +0800
Subject: [PATCH 2/4] adaboost.py float

---
 build/lib/dml/ADAB/adaBoost.py | 16 +++++++++++-----
 dml/ADAB/adaBoost.py           |  2 +-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/build/lib/dml/ADAB/adaBoost.py b/build/lib/dml/ADAB/adaBoost.py
index 618c58b..bbeba7b 100644
--- a/build/lib/dml/ADAB/adaBoost.py
+++ b/build/lib/dml/ADAB/adaBoost.py
@@ -7,13 +7,15 @@
 class ADABC:
 	def __init__(self,X,y,Weaker=WEAKC):
 		'''
+		    X  is a  N*M matrix
 			Weaker is a class of weak classifier
 			It should have a 	train(self.W) method pass the weight parameter to train
 								pred(test_set) method which return y formed by 1 or -1
 			see detail in <统计学习方法>
 		'''
 		self.X=np.array(X)
-		self.y=np.array(y)
+		self.y=np.array(y).flatten(1)
+		assert self.X.shape[1]==self.y.size
 		self.Weaker=Weaker
 		self.sums=np.zeros(self.y.shape)
 		self.W=np.ones((self.X.shape[1],1)).flatten(1)/self.X.shape[1]
@@ -32,7 +34,7 @@ def train(self,M=4):
 			self.G[i]=self.Weaker(self.X,self.y)
 			e=self.G[i].train(self.W)
 			#print self.G[i].t_val,self.G[i].t_b,e
-			self.alpha[i]=1/2*np.log((1-e)/e)
+			self.alpha[i]=float(1.0 / 2 * np.log((1 - e) / e))
 			#print self.alpha[i]
 			sg=self.G[i].pred(self.X)
 			Z=self.W*np.exp(-self.alpha[i]*self.y*sg.transpose())
@@ -57,10 +59,14 @@ def finalclassifer(self,t):
 		#pre_y=sign(sums)
 		t=(pre_y!=self.y).sum()
 		return t
+		
 	def pred(self,test_set):
-		sums=np.zeros(self.y.shape)
+		test_set=np.array(test_set)
+		assert test_set.shape[0]==self.X.shape[0]
+		sums=np.zeros((test_set.shape[1],1)).flatten(1)
+
 		for i in range(self.Q+1):
-			sums=sums+self.G[i].pred(self.X).flatten(1)*self.alpha[i]
+			sums=sums+self.G[i].pred(test_set).flatten(1)*self.alpha[i]
 			#print sums
 		pre_y=sign(sums)
-		return pre_y
+		return pre_y
\ No newline at end of file
diff --git a/dml/ADAB/adaBoost.py b/dml/ADAB/adaBoost.py
index 74a59ad..bbeba7b 100644
--- a/dml/ADAB/adaBoost.py
+++ b/dml/ADAB/adaBoost.py
@@ -34,7 +34,7 @@ def train(self,M=4):
 			self.G[i]=self.Weaker(self.X,self.y)
 			e=self.G[i].train(self.W)
 			#print self.G[i].t_val,self.G[i].t_b,e
-			self.alpha[i]=1/2*np.log((1-e)/e)
+			self.alpha[i]=float(1.0 / 2 * np.log((1 - e) / e))
 			#print self.alpha[i]
 			sg=self.G[i].pred(self.X)
 			Z=self.W*np.exp(-self.alpha[i]*self.y*sg.transpose())

From 032823bfaf2a24f43201584f21b4c4f361de806f Mon Sep 17 00:00:00 2001
From: DarkScope <justdark@users.noreply.github.com>
Date: Sat, 30 Dec 2017 14:26:28 +0800
Subject: [PATCH 3/4] Create mcmc.py

mcmc
---
 dml/tool/mcmc.py | 111 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 dml/tool/mcmc.py

diff --git a/dml/tool/mcmc.py b/dml/tool/mcmc.py
new file mode 100644
index 0000000..29c9fb2
--- /dev/null
+++ b/dml/tool/mcmc.py
@@ -0,0 +1,111 @@
+from matplotlib import pyplot as plt
+import numpy as np
+from mpl_toolkits.mplot3d import Axes3D
+from matplotlib import cm
+import math
+PI = 3.14159265358979323846
+def domain_random():
+    return np.random.random()*3.8-1.9
+def get_p(x):
+    # return 1/(2*PI)*np.exp(- x[0]**2 - x[1]**2)
+    return 1/(2*PI*math.sqrt(1-0.25))*np.exp(-1/(2*(1-0.25))*(x[0]**2 -x[0]*x[1] + x[1]**2))
+
+def get_tilde_p(x):
+    return get_p(x)*20
+
+def partialSampler(x,dim):
+    xes = []
+    for t in range(10):
+        xes.append(domain_random())
+    tilde_ps = []
+    for t in range(10):
+        tmpx = x[:]
+        tmpx[dim] = xes[t]
+        tilde_ps.append(get_tilde_p(tmpx))
+
+    norm_tilde_ps = np.asarray(tilde_ps)/sum(tilde_ps)
+    u = np.random.random()
+    sums = 0.0
+    for t in range(10):
+        sums += norm_tilde_ps[t]
+        if sums>=u:
+            return xes[t]
+
+
+def plotContour(plot = False):
+    X = np.arange(-2, 2, 0.05)
+    Y = np.arange(-2, 2, 0.05)
+    X, Y = np.meshgrid(X, Y)
+    Z = get_p([X,Y])
+    plt.contourf(X, Y, Z, 100, alpha = 1.0, cmap =cm.coolwarm)
+
+    # plt.contour(X, Y, Z, 7, colors = 'black', linewidth = 0.01)
+    if plot:
+        plt.show()
+
+def plot3D():
+    X = np.arange(-2, 2, 0.05)
+    Y = np.arange(-2, 2, 0.05)
+    X, Y = np.meshgrid(X, Y)
+    Z = get_p([X,Y])
+    fig = plt.figure()
+    ax = Axes3D(fig)
+    ax.grid(False)
+    ax.w_yaxis.set_pane_color((1,1,1,0))
+    ax.w_xaxis.set_pane_color((1,1,1,1))
+    ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm)
+    plt.show()
+
+# plotContour()
+# plot3D()
+
+def metropolis(x):
+    new_x = (domain_random(),domain_random())
+    acc = min(1,get_tilde_p((new_x[0],new_x[1]))/get_tilde_p((x[0],x[1])))
+    u = np.random.random()
+    if u<acc:
+        return new_x
+    return x
+
+def testMetropolis(counts = 100,drawPath = False):
+    plotContour()
+    x = (domain_random(),domain_random())
+    xs = [x]
+
+    for i in range(counts):
+        xs.append(x)
+        x = metropolis(x)
+    if drawPath:
+        plt.plot(map(lambda x:x[0],xs),map(lambda x:x[1],xs),'k-',linewidth=0.5)
+
+    plt.scatter(map(lambda x:x[0],xs),map(lambda x:x[1],xs),c = 'g',marker='.')
+    plt.show()
+    pass
+
+def gibbs(x):
+    rst = np.asarray(x)[:]
+    path = [(x[0],x[1])]
+    for dim in range(2):
+        new_value = partialSampler(rst,dim)
+        rst[dim] = new_value
+        path.append([rst[0],rst[1]])
+    return rst,path
+
+def testGibbs(counts = 100,drawPath = False):
+    plotContour()
+
+    x = (domain_random(),domain_random())
+    xs = [x]
+    paths = [x]
+    for i in range(counts):
+        xs.append([x[0],x[1]])
+        x,path = gibbs(x)
+        paths.extend(path)
+    if drawPath:
+        plt.plot(map(lambda x:x[0],paths),map(lambda x:x[1],paths),'k-',linewidth=0.5)
+    plt.scatter(map(lambda x:x[0],xs),map(lambda x:x[1],xs),c = 'g',marker='.')
+    plt.show()
+    pass
+
+testMetropolis(5000,False)
+# testGibbs(5000,False)

From 2179026f3ad904cf5806e0a48452fbca76c43a0c Mon Sep 17 00:00:00 2001
From: Tim Gates <tim.gates@iress.com>
Date: Tue, 15 Jun 2021 22:45:10 +1000
Subject: [PATCH 4/4] docs: fix a few simple typos

There are small typos in:
- README.md
- dml/CNN/CNN.py
- dml/DT/decisionTree.py
- dml/LR/logisticRegression.py
- dml/NB/naiveBayesian.py
- dml/NN/neuralNetwork.py
- test/naiveBayesian/nb_test.py

Fixes:
- Should read `continuous` rather than `continous`.
- Should read `discreet` rather than `descret`.
- Should read `actually` rather than `actualy`.
- Should read `referred` rather than `refered`.
- Should read `recommend` rather than `recommand`.

Closes #2
---
 README.md                     | 4 ++--
 dml/CNN/CNN.py                | 2 +-
 dml/DT/decisionTree.py        | 2 +-
 dml/LR/logisticRegression.py  | 2 +-
 dml/NB/naiveBayesian.py       | 6 +++---
 dml/NN/neuralNetwork.py       | 2 +-
 test/naiveBayesian/nb_test.py | 2 +-
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index bcf9282..0355125 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Code Files
 
 `./dml/NN` -the code of **Neural NetWorks**
 
-`./dml/LR` -**Logistic Regression**,actualy It's **softmax**
+`./dml/LR` -**Logistic Regression**,actually It's **softmax**
 
 `./dml/DT` -**Decision Tree** , CART algorithm
 
@@ -21,7 +21,7 @@ Code Files
 
 `./dml/KNN` -the **k-Nearest Neighbor** algorithm(kd-tree BBF implementing)
 
-`./dml/NB`  -the **naive Bayesian** support both  continous and descrete features
+`./dml/NB`  -the **naive Bayesian** support both  continuous and descrete features
 
 `./dml/SVM` -the basic binary **Support Vector Machine**
 
diff --git a/dml/CNN/CNN.py b/dml/CNN/CNN.py
index f2788a0..ad54858 100644
--- a/dml/CNN/CNN.py
+++ b/dml/CNN/CNN.py
@@ -5,7 +5,7 @@
 from dml.tool import sigmoid,expand,showimage
 from numpy import rot90
 '''
-	this algorithm have refered to the DeepLearnToolBox(https://github.com/rasmusbergpalm/DeepLearnToolbox)
+	this algorithm have referred to the DeepLearnToolBox(https://github.com/rasmusbergpalm/DeepLearnToolbox)
 	also:[1]:"Notes on Convolutional Neural Networks" Jake Bouvrie 2006 - How to implement CNNs
 	I want to implement as [1] described,where the subsampling layer have sigmoid function
 	 but finally it does not converge,but I can pass the gradcheck!!
diff --git a/dml/DT/decisionTree.py b/dml/DT/decisionTree.py
index 7765e0b..cddae99 100644
--- a/dml/DT/decisionTree.py
+++ b/dml/DT/decisionTree.py
@@ -29,7 +29,7 @@ def __init__(self,X,y,property=None):
 			so If your X have some string parameter,all thing will translate to string
 			in this situation,you can't have continuous parameter
 			so remember:
-			if you have continous parameter,DON'T PUT any STRING IN X  !!!!!!!!
+			if you have continuous parameter,DON'T PUT any STRING IN X  !!!!!!!!
 		'''
 		self.X=np.array(X).transpose()
 		self.y=np.array(y).flatten(1)
diff --git a/dml/LR/logisticRegression.py b/dml/LR/logisticRegression.py
index 4c00236..e2de04c 100644
--- a/dml/LR/logisticRegression.py
+++ b/dml/LR/logisticRegression.py
@@ -12,7 +12,7 @@ class LRC:
 		X	-is a N*M matrix  
 		y	-is a M  vector 
 		lam	-is the parameter lambda for LR penalty
-		actualy it's a softmax......=.=
+		actually it's a softmax......=.=
 		but they are same when the class_number is 2
 		also see ufldl:
 		http://ufldl.stanford.edu/wiki/index.php/Softmax_Regression
diff --git a/dml/NB/naiveBayesian.py b/dml/NB/naiveBayesian.py
index 3af8cde..0325334 100644
--- a/dml/NB/naiveBayesian.py
+++ b/dml/NB/naiveBayesian.py
@@ -9,10 +9,10 @@ class NBC:
 	def __init__(self,X,y,Indicator=None):
 		'''
 		 X 			 a N*M matrix where M is the train case number
-		 			 should be number both continous and descret feature
+		 			 should be number both continuous and discreet feature
 		 y			 class label for classification
-		 Indicator 	 show whether the feature is continous(0) or descret(1)
-		             continous in default
+		 Indicator 	 show whether the feature is continuous(0) or discreet(1)
+		             continuous in default
 
 		'''
 		self.X=np.array(X)
diff --git a/dml/NN/neuralNetwork.py b/dml/NN/neuralNetwork.py
index 78763d4..b4fef25 100644
--- a/dml/NN/neuralNetwork.py
+++ b/dml/NN/neuralNetwork.py
@@ -3,7 +3,7 @@
 	DeepLearnToolbox(https://github.com/rasmusbergpalm/DeepLearnToolbox)
 	I think the whole architecture of it is clear and easy to understand
 	so I copy it to python
-	I also recommand UFLDL(http://ufldl.stanford.edu/wiki/index.php/UFLDL_Tutorial)
+	I also recommend UFLDL(http://ufldl.stanford.edu/wiki/index.php/UFLDL_Tutorial)
 	to learn Neural Network
 	
 	TODO:SAE,DAE and so on
diff --git a/test/naiveBayesian/nb_test.py b/test/naiveBayesian/nb_test.py
index 001a45a..50e9763 100644
--- a/test/naiveBayesian/nb_test.py
+++ b/test/naiveBayesian/nb_test.py
@@ -5,7 +5,7 @@
 import scipy as sp
 from dml.NB import NBC
 '''
-  Example of descret data from  <统计学习方法>
+  Example of discreet data from  <统计学习方法>
   S=0
   M=1
   L=2