Skip to content

Commit 9aba8a2

Browse files
author
liwei
committed
pca and cur for dim-reduction
1 parent 01cbc49 commit 9aba8a2

File tree

1 file changed

+116
-14
lines changed

1 file changed

+116
-14
lines changed

dimension_reduction.py

Lines changed: 116 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,29 @@
33
# chapter 11
44

55
import math
6+
import itertools
67
import numpy as np
8+
def take(n, iterable):
9+
return list(itertools.islice(iterable, n))
710

811
EPSILON = 0.00001
912

13+
14+
def simple_M():
15+
return np.array([[1, 2],
16+
[2, 1],
17+
[3, 4],
18+
[4, 3]])
19+
def perfect_M():
20+
M = [[1, 1, 1, 0, 0],
21+
[3, 3, 3, 0, 0],
22+
[4, 4, 4, 0, 0],
23+
[5, 5, 5, 0, 0],
24+
[0, 0, 0, 4, 4],
25+
[0, 0, 0, 5, 5],
26+
[0, 0, 0, 2, 2]]
27+
return np.array(M)
28+
1029
def perfect_data():
1130
U = np.array([[.14, .42, .56, .70, 0, 0, 0],
1231
[0, 0, 0, 0, .60, .75, .30]]).T
@@ -48,6 +67,10 @@ def power_iteration(M, max_loop=100, power_iteration_epsilon=EPSILON):
4867
n,m = M.shape
4968
x = np.ones(n)
5069

70+
# in case Mx = 0
71+
if np.sum(np.abs(np.dot(M, x))) <= power_iteration_epsilon:
72+
x[0] = -1.0
73+
5174
while max_loop >= 1:
5275
x_k = np.dot(M, x)
5376
x_k = x_k / (frobenius_norm(x_k) * 1.0)
@@ -61,8 +84,16 @@ def power_iteration(M, max_loop=100, power_iteration_epsilon=EPSILON):
6184
eigen_value = x.T.dot(M).dot(x)
6285
return eigen_vector, eigen_value
6386

64-
def eigen_solver(M, min_eigvalue=0.01, *args, **kw):
65-
"""Note M will be modified"""
87+
def pseudoinverse(M_orig, diag=True, eps=EPSILON):
88+
M = M_orig.copy()
89+
if diag:
90+
return np.diag([x if x <= eps else 1.0/x for x in M.diagonal()])
91+
else:
92+
raise NotImplementedError
93+
94+
95+
def eigen_solver(M_orig, min_eigvalue=0.01, *args, **kw):
96+
M = M_orig.copy()
6697
E = []
6798
Sigma = []
6899
while 1:
@@ -78,11 +109,16 @@ def eigen_solver(M, min_eigvalue=0.01, *args, **kw):
78109

79110
return np.array(E).T, np.diag(Sigma)
80111

112+
113+
def pca(M, *args, **kw):
114+
tmp = M.T.dot(M)
115+
E, Sigma = eigen_solver(tmp, *args, **kw)
116+
return E, Sigma
117+
81118
def svd(M, *args, **kw):
82119
tmp = M.T.dot(M)
83120
V, SigmaSquare = eigen_solver(tmp, *args, **kw)
84-
tmp = M.dot(M.T)
85-
U, SigmaSquare = eigen_solver(tmp, *args, **kw)
121+
U, SigmaSquare = eigen_solver(tmp.T, *args, **kw)
86122
return U, np.sqrt(SigmaSquare), V.T
87123

88124

@@ -101,28 +137,61 @@ def _calc_prob(v, fnorm):
101137
return 1.0 * frobenius_norm_square(v) / fnorm
102138

103139

104-
def kbig(vec, k):
105-
if k <= 0:
106-
return None
107-
108-
for offset,value in enumerate(vec):
109-
pass
110-
111140
def _select_max_by_prob(M, r):
112141
fnorm = frobenius_norm_square(M)
113142
# columns
114143
column_probs = np.apply_along_axis(_calc_prob, 0, M, fnorm)
115144
# rows
116145
row_probs = np.apply_along_axis(_calc_prob, 1, M, fnorm)
146+
column_indices = [offset for offset,_ in take(r,
147+
sorted(enumerate(column_probs),
148+
key=lambda o: o[1],
149+
reverse=True))]
150+
row_indices = [offset for offset,_ in take(r,
151+
sorted(enumerate(row_probs),
152+
key=lambda o: o[1],
153+
reverse=True))]
154+
W = np.array([M[i,j] for i in reversed(row_indices)\
155+
for j in reversed(column_indices)]).reshape(r,r)
156+
# The following code actually do the same thing using itertools.
157+
#W = M[zip(*itertools.product(reversed(row_indices),
158+
#reversed(column_indices)))].reshape(r,r)
117159

160+
MT = M.T.copy()
161+
CT = []
162+
for column_indice in column_indices:
163+
prob = column_probs[column_indice]
164+
expected_value = math.sqrt(r * prob)
165+
CT.append(np.divide(MT[column_indice], expected_value))
166+
167+
C = np.array(CT).T
168+
169+
R = []
170+
for row_indice in row_indices:
171+
prob = row_probs[row_indice]
172+
expected_value = math.sqrt(r * prob)
173+
R.append(np.divide(M[row_indice], expected_value))
174+
175+
R = np.array(R)
176+
return C, R, W
118177

119178
def cur(M, r, select_method=MAX_BY_PROB, *args, **kw):
120179
"""CUR decomposition
121180
M is the matrix to be decomposed, r is the estimated rankd of
122181
the matrix"""
123-
def select_cr():
124-
if select_method=
125182

183+
def _select_cr():
184+
return {
185+
RANDOMLY: _select_randomly,
186+
RANDOM_BY_PROB: _select_randomly_by_prob,
187+
MAX_BY_PROB: _select_max_by_prob,
188+
}[select_method](M, r)
189+
190+
C, R, W = _select_cr()
191+
X, Sigma, YT = svd(W)
192+
Sigma = np.square(pseudoinverse(Sigma))
193+
U = YT.T.dot(Sigma).dot(X.T)
194+
return C, U, R
126195

127196
def test_eigen_solver():
128197
arr = np.array([[3, 2],
@@ -147,6 +216,39 @@ def test_svd():
147216
print "***************"
148217
print VT
149218

219+
def test_select_max_by_prob():
220+
M = np.arange(0, 16).reshape(4,4)
221+
print M
222+
C, R, W = _select_max_by_prob(M, 2)
223+
print "***************"
224+
print C.shape
225+
print C
226+
print "***************"
227+
print R
228+
print R.shape
229+
print "***************"
230+
print W.shape
231+
print W
232+
233+
def test_cur():
234+
C, U, R = cur(perfect_M(), 4)
235+
print C
236+
print U
237+
print R
238+
239+
def test_pca():
240+
M = simple_M()
241+
E, Sigma = pca(M)
242+
print E
243+
print "***************"
244+
print Sigma
245+
print "***************"
246+
print M.dot(E)
247+
150248
#demo_querying_using_concepts()
151249
#test_eigen_solver()
152-
test_svd()
250+
#test_svd()
251+
#test_select_max_by_prob()
252+
#test_cur()
253+
#test_pca()
254+
#test_power_iteration()

0 commit comments

Comments
 (0)