From d0503cbe6a8f1ff5af9db410cd853ef916966edb Mon Sep 17 00:00:00 2001 From: justdark Date: Sun, 13 Nov 2016 15:37:09 +0800 Subject: [PATCH 1/4] Update pca.py bug --- dml/tool/pca.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dml/tool/pca.py b/dml/tool/pca.py index a23b7ba..b0e34c6 100644 --- a/dml/tool/pca.py +++ b/dml/tool/pca.py @@ -10,12 +10,13 @@ def pca(X,axis=0): Returns the eigenvectors U, the eigenvalues (on diagonal) in S ''' X_s=np.array(X) + X_s = (X_s.transpose()-np.mean(X, axis=1)).transpose() if axis==0: N,M=X_s.shape - Sigma=np.dot(X_s,X_s.transpose())/M + Sigma=np.dot(X_s,X_s.transpose())/(M-1) else: M,N=X_s.shape - Sigma=np.dot(X_s.transpose(),X_s)/M + Sigma=np.dot(X_s.transpose(),X_s)/(M-1) U,S,V = sp.linalg.svd(Sigma); return U,S @@ -25,4 +26,4 @@ def projectData(X,U,K): return np.dot(U[:,:K].transpose(),X) def recoverData(Z,U,K): - return np.dot(U[:,:K],Z) \ No newline at end of file + return np.dot(U[:,:K],Z) From 10a1d9f741fe3b2b9bd1ced51eed9a2124f9adf3 Mon Sep 17 00:00:00 2001 From: admin Date: Wed, 14 Dec 2016 10:33:34 +0800 Subject: [PATCH 2/4] adaboost.py float --- build/lib/dml/ADAB/adaBoost.py | 16 +++++++++++----- dml/ADAB/adaBoost.py | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/build/lib/dml/ADAB/adaBoost.py b/build/lib/dml/ADAB/adaBoost.py index 618c58b..bbeba7b 100644 --- a/build/lib/dml/ADAB/adaBoost.py +++ b/build/lib/dml/ADAB/adaBoost.py @@ -7,13 +7,15 @@ class ADABC: def __init__(self,X,y,Weaker=WEAKC): ''' + X is a N*M matrix Weaker is a class of weak classifier It should have a train(self.W) method pass the weight parameter to train pred(test_set) method which return y formed by 1 or -1 see detail in <统计学习方法> ''' self.X=np.array(X) - self.y=np.array(y) + self.y=np.array(y).flatten(1) + assert self.X.shape[1]==self.y.size self.Weaker=Weaker self.sums=np.zeros(self.y.shape) self.W=np.ones((self.X.shape[1],1)).flatten(1)/self.X.shape[1] @@ -32,7 +34,7 @@ def train(self,M=4): self.G[i]=self.Weaker(self.X,self.y) e=self.G[i].train(self.W) #print self.G[i].t_val,self.G[i].t_b,e - self.alpha[i]=1/2*np.log((1-e)/e) + self.alpha[i]=float(1.0 / 2 * np.log((1 - e) / e)) #print self.alpha[i] sg=self.G[i].pred(self.X) Z=self.W*np.exp(-self.alpha[i]*self.y*sg.transpose()) @@ -57,10 +59,14 @@ def finalclassifer(self,t): #pre_y=sign(sums) t=(pre_y!=self.y).sum() return t + def pred(self,test_set): - sums=np.zeros(self.y.shape) + test_set=np.array(test_set) + assert test_set.shape[0]==self.X.shape[0] + sums=np.zeros((test_set.shape[1],1)).flatten(1) + for i in range(self.Q+1): - sums=sums+self.G[i].pred(self.X).flatten(1)*self.alpha[i] + sums=sums+self.G[i].pred(test_set).flatten(1)*self.alpha[i] #print sums pre_y=sign(sums) - return pre_y + return pre_y \ No newline at end of file diff --git a/dml/ADAB/adaBoost.py b/dml/ADAB/adaBoost.py index 74a59ad..bbeba7b 100644 --- a/dml/ADAB/adaBoost.py +++ b/dml/ADAB/adaBoost.py @@ -34,7 +34,7 @@ def train(self,M=4): self.G[i]=self.Weaker(self.X,self.y) e=self.G[i].train(self.W) #print self.G[i].t_val,self.G[i].t_b,e - self.alpha[i]=1/2*np.log((1-e)/e) + self.alpha[i]=float(1.0 / 2 * np.log((1 - e) / e)) #print self.alpha[i] sg=self.G[i].pred(self.X) Z=self.W*np.exp(-self.alpha[i]*self.y*sg.transpose()) From 032823bfaf2a24f43201584f21b4c4f361de806f Mon Sep 17 00:00:00 2001 From: DarkScope Date: Sat, 30 Dec 2017 14:26:28 +0800 Subject: [PATCH 3/4] Create mcmc.py mcmc --- dml/tool/mcmc.py | 111 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 dml/tool/mcmc.py diff --git a/dml/tool/mcmc.py b/dml/tool/mcmc.py new file mode 100644 index 0000000..29c9fb2 --- /dev/null +++ b/dml/tool/mcmc.py @@ -0,0 +1,111 @@ +from matplotlib import pyplot as plt +import numpy as np +from mpl_toolkits.mplot3d import Axes3D +from matplotlib import cm +import math +PI = 3.14159265358979323846 +def domain_random(): + return np.random.random()*3.8-1.9 +def get_p(x): + # return 1/(2*PI)*np.exp(- x[0]**2 - x[1]**2) + return 1/(2*PI*math.sqrt(1-0.25))*np.exp(-1/(2*(1-0.25))*(x[0]**2 -x[0]*x[1] + x[1]**2)) + +def get_tilde_p(x): + return get_p(x)*20 + +def partialSampler(x,dim): + xes = [] + for t in range(10): + xes.append(domain_random()) + tilde_ps = [] + for t in range(10): + tmpx = x[:] + tmpx[dim] = xes[t] + tilde_ps.append(get_tilde_p(tmpx)) + + norm_tilde_ps = np.asarray(tilde_ps)/sum(tilde_ps) + u = np.random.random() + sums = 0.0 + for t in range(10): + sums += norm_tilde_ps[t] + if sums>=u: + return xes[t] + + +def plotContour(plot = False): + X = np.arange(-2, 2, 0.05) + Y = np.arange(-2, 2, 0.05) + X, Y = np.meshgrid(X, Y) + Z = get_p([X,Y]) + plt.contourf(X, Y, Z, 100, alpha = 1.0, cmap =cm.coolwarm) + + # plt.contour(X, Y, Z, 7, colors = 'black', linewidth = 0.01) + if plot: + plt.show() + +def plot3D(): + X = np.arange(-2, 2, 0.05) + Y = np.arange(-2, 2, 0.05) + X, Y = np.meshgrid(X, Y) + Z = get_p([X,Y]) + fig = plt.figure() + ax = Axes3D(fig) + ax.grid(False) + ax.w_yaxis.set_pane_color((1,1,1,0)) + ax.w_xaxis.set_pane_color((1,1,1,1)) + ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm) + plt.show() + +# plotContour() +# plot3D() + +def metropolis(x): + new_x = (domain_random(),domain_random()) + acc = min(1,get_tilde_p((new_x[0],new_x[1]))/get_tilde_p((x[0],x[1]))) + u = np.random.random() + if u Date: Tue, 15 Jun 2021 22:45:10 +1000 Subject: [PATCH 4/4] docs: fix a few simple typos There are small typos in: - README.md - dml/CNN/CNN.py - dml/DT/decisionTree.py - dml/LR/logisticRegression.py - dml/NB/naiveBayesian.py - dml/NN/neuralNetwork.py - test/naiveBayesian/nb_test.py Fixes: - Should read `continuous` rather than `continous`. - Should read `discreet` rather than `descret`. - Should read `actually` rather than `actualy`. - Should read `referred` rather than `refered`. - Should read `recommend` rather than `recommand`. Closes #2 --- README.md | 4 ++-- dml/CNN/CNN.py | 2 +- dml/DT/decisionTree.py | 2 +- dml/LR/logisticRegression.py | 2 +- dml/NB/naiveBayesian.py | 6 +++--- dml/NN/neuralNetwork.py | 2 +- test/naiveBayesian/nb_test.py | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index bcf9282..0355125 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Code Files `./dml/NN` -the code of **Neural NetWorks** -`./dml/LR` -**Logistic Regression**,actualy It's **softmax** +`./dml/LR` -**Logistic Regression**,actually It's **softmax** `./dml/DT` -**Decision Tree** , CART algorithm @@ -21,7 +21,7 @@ Code Files `./dml/KNN` -the **k-Nearest Neighbor** algorithm(kd-tree BBF implementing) -`./dml/NB` -the **naive Bayesian** support both continous and descrete features +`./dml/NB` -the **naive Bayesian** support both continuous and descrete features `./dml/SVM` -the basic binary **Support Vector Machine** diff --git a/dml/CNN/CNN.py b/dml/CNN/CNN.py index f2788a0..ad54858 100644 --- a/dml/CNN/CNN.py +++ b/dml/CNN/CNN.py @@ -5,7 +5,7 @@ from dml.tool import sigmoid,expand,showimage from numpy import rot90 ''' - this algorithm have refered to the DeepLearnToolBox(https://github.com/rasmusbergpalm/DeepLearnToolbox) + this algorithm have referred to the DeepLearnToolBox(https://github.com/rasmusbergpalm/DeepLearnToolbox) also:[1]:"Notes on Convolutional Neural Networks" Jake Bouvrie 2006 - How to implement CNNs I want to implement as [1] described,where the subsampling layer have sigmoid function but finally it does not converge,but I can pass the gradcheck!! diff --git a/dml/DT/decisionTree.py b/dml/DT/decisionTree.py index 7765e0b..cddae99 100644 --- a/dml/DT/decisionTree.py +++ b/dml/DT/decisionTree.py @@ -29,7 +29,7 @@ def __init__(self,X,y,property=None): so If your X have some string parameter,all thing will translate to string in this situation,you can't have continuous parameter so remember: - if you have continous parameter,DON'T PUT any STRING IN X !!!!!!!! + if you have continuous parameter,DON'T PUT any STRING IN X !!!!!!!! ''' self.X=np.array(X).transpose() self.y=np.array(y).flatten(1) diff --git a/dml/LR/logisticRegression.py b/dml/LR/logisticRegression.py index 4c00236..e2de04c 100644 --- a/dml/LR/logisticRegression.py +++ b/dml/LR/logisticRegression.py @@ -12,7 +12,7 @@ class LRC: X -is a N*M matrix y -is a M vector lam -is the parameter lambda for LR penalty - actualy it's a softmax......=.= + actually it's a softmax......=.= but they are same when the class_number is 2 also see ufldl: http://ufldl.stanford.edu/wiki/index.php/Softmax_Regression diff --git a/dml/NB/naiveBayesian.py b/dml/NB/naiveBayesian.py index 3af8cde..0325334 100644 --- a/dml/NB/naiveBayesian.py +++ b/dml/NB/naiveBayesian.py @@ -9,10 +9,10 @@ class NBC: def __init__(self,X,y,Indicator=None): ''' X a N*M matrix where M is the train case number - should be number both continous and descret feature + should be number both continuous and discreet feature y class label for classification - Indicator show whether the feature is continous(0) or descret(1) - continous in default + Indicator show whether the feature is continuous(0) or discreet(1) + continuous in default ''' self.X=np.array(X) diff --git a/dml/NN/neuralNetwork.py b/dml/NN/neuralNetwork.py index 78763d4..b4fef25 100644 --- a/dml/NN/neuralNetwork.py +++ b/dml/NN/neuralNetwork.py @@ -3,7 +3,7 @@ DeepLearnToolbox(https://github.com/rasmusbergpalm/DeepLearnToolbox) I think the whole architecture of it is clear and easy to understand so I copy it to python - I also recommand UFLDL(http://ufldl.stanford.edu/wiki/index.php/UFLDL_Tutorial) + I also recommend UFLDL(http://ufldl.stanford.edu/wiki/index.php/UFLDL_Tutorial) to learn Neural Network TODO:SAE,DAE and so on diff --git a/test/naiveBayesian/nb_test.py b/test/naiveBayesian/nb_test.py index 001a45a..50e9763 100644 --- a/test/naiveBayesian/nb_test.py +++ b/test/naiveBayesian/nb_test.py @@ -5,7 +5,7 @@ import scipy as sp from dml.NB import NBC ''' - Example of descret data from <统计学习方法> + Example of discreet data from <统计学习方法> S=0 M=1 L=2