3
3
# chapter 11
4
4
5
5
import math
6
+ import itertools
6
7
import numpy as np
8
+ def take (n , iterable ):
9
+ return list (itertools .islice (iterable , n ))
7
10
8
11
EPSILON = 0.00001
9
12
13
+
14
+ def simple_M ():
15
+ return np .array ([[1 , 2 ],
16
+ [2 , 1 ],
17
+ [3 , 4 ],
18
+ [4 , 3 ]])
19
+ def perfect_M ():
20
+ M = [[1 , 1 , 1 , 0 , 0 ],
21
+ [3 , 3 , 3 , 0 , 0 ],
22
+ [4 , 4 , 4 , 0 , 0 ],
23
+ [5 , 5 , 5 , 0 , 0 ],
24
+ [0 , 0 , 0 , 4 , 4 ],
25
+ [0 , 0 , 0 , 5 , 5 ],
26
+ [0 , 0 , 0 , 2 , 2 ]]
27
+ return np .array (M )
28
+
10
29
def perfect_data ():
11
30
U = np .array ([[.14 , .42 , .56 , .70 , 0 , 0 , 0 ],
12
31
[0 , 0 , 0 , 0 , .60 , .75 , .30 ]]).T
@@ -48,6 +67,10 @@ def power_iteration(M, max_loop=100, power_iteration_epsilon=EPSILON):
48
67
n ,m = M .shape
49
68
x = np .ones (n )
50
69
70
+ # in case Mx = 0
71
+ if np .sum (np .abs (np .dot (M , x ))) <= power_iteration_epsilon :
72
+ x [0 ] = - 1.0
73
+
51
74
while max_loop >= 1 :
52
75
x_k = np .dot (M , x )
53
76
x_k = x_k / (frobenius_norm (x_k ) * 1.0 )
@@ -61,8 +84,16 @@ def power_iteration(M, max_loop=100, power_iteration_epsilon=EPSILON):
61
84
eigen_value = x .T .dot (M ).dot (x )
62
85
return eigen_vector , eigen_value
63
86
64
- def eigen_solver (M , min_eigvalue = 0.01 , * args , ** kw ):
65
- """Note M will be modified"""
87
+ def pseudoinverse (M_orig , diag = True , eps = EPSILON ):
88
+ M = M_orig .copy ()
89
+ if diag :
90
+ return np .diag ([x if x <= eps else 1.0 / x for x in M .diagonal ()])
91
+ else :
92
+ raise NotImplementedError
93
+
94
+
95
+ def eigen_solver (M_orig , min_eigvalue = 0.01 , * args , ** kw ):
96
+ M = M_orig .copy ()
66
97
E = []
67
98
Sigma = []
68
99
while 1 :
@@ -78,11 +109,16 @@ def eigen_solver(M, min_eigvalue=0.01, *args, **kw):
78
109
79
110
return np .array (E ).T , np .diag (Sigma )
80
111
112
+
113
+ def pca (M , * args , ** kw ):
114
+ tmp = M .T .dot (M )
115
+ E , Sigma = eigen_solver (tmp , * args , ** kw )
116
+ return E , Sigma
117
+
81
118
def svd (M , * args , ** kw ):
82
119
tmp = M .T .dot (M )
83
120
V , SigmaSquare = eigen_solver (tmp , * args , ** kw )
84
- tmp = M .dot (M .T )
85
- U , SigmaSquare = eigen_solver (tmp , * args , ** kw )
121
+ U , SigmaSquare = eigen_solver (tmp .T , * args , ** kw )
86
122
return U , np .sqrt (SigmaSquare ), V .T
87
123
88
124
@@ -101,28 +137,61 @@ def _calc_prob(v, fnorm):
101
137
return 1.0 * frobenius_norm_square (v ) / fnorm
102
138
103
139
104
- def kbig (vec , k ):
105
- if k <= 0 :
106
- return None
107
-
108
- for offset ,value in enumerate (vec ):
109
- pass
110
-
111
140
def _select_max_by_prob (M , r ):
112
141
fnorm = frobenius_norm_square (M )
113
142
# columns
114
143
column_probs = np .apply_along_axis (_calc_prob , 0 , M , fnorm )
115
144
# rows
116
145
row_probs = np .apply_along_axis (_calc_prob , 1 , M , fnorm )
146
+ column_indices = [offset for offset ,_ in take (r ,
147
+ sorted (enumerate (column_probs ),
148
+ key = lambda o : o [1 ],
149
+ reverse = True ))]
150
+ row_indices = [offset for offset ,_ in take (r ,
151
+ sorted (enumerate (row_probs ),
152
+ key = lambda o : o [1 ],
153
+ reverse = True ))]
154
+ W = np .array ([M [i ,j ] for i in reversed (row_indices )\
155
+ for j in reversed (column_indices )]).reshape (r ,r )
156
+ # The following code actually do the same thing using itertools.
157
+ #W = M[zip(*itertools.product(reversed(row_indices),
158
+ #reversed(column_indices)))].reshape(r,r)
117
159
160
+ MT = M .T .copy ()
161
+ CT = []
162
+ for column_indice in column_indices :
163
+ prob = column_probs [column_indice ]
164
+ expected_value = math .sqrt (r * prob )
165
+ CT .append (np .divide (MT [column_indice ], expected_value ))
166
+
167
+ C = np .array (CT ).T
168
+
169
+ R = []
170
+ for row_indice in row_indices :
171
+ prob = row_probs [row_indice ]
172
+ expected_value = math .sqrt (r * prob )
173
+ R .append (np .divide (M [row_indice ], expected_value ))
174
+
175
+ R = np .array (R )
176
+ return C , R , W
118
177
119
178
def cur (M , r , select_method = MAX_BY_PROB , * args , ** kw ):
120
179
"""CUR decomposition
121
180
M is the matrix to be decomposed, r is the estimated rankd of
122
181
the matrix"""
123
- def select_cr ():
124
- if select_method =
125
182
183
+ def _select_cr ():
184
+ return {
185
+ RANDOMLY : _select_randomly ,
186
+ RANDOM_BY_PROB : _select_randomly_by_prob ,
187
+ MAX_BY_PROB : _select_max_by_prob ,
188
+ }[select_method ](M , r )
189
+
190
+ C , R , W = _select_cr ()
191
+ X , Sigma , YT = svd (W )
192
+ Sigma = np .square (pseudoinverse (Sigma ))
193
+ U = YT .T .dot (Sigma ).dot (X .T )
194
+ return C , U , R
126
195
127
196
def test_eigen_solver ():
128
197
arr = np .array ([[3 , 2 ],
@@ -147,6 +216,39 @@ def test_svd():
147
216
print "***************"
148
217
print VT
149
218
219
+ def test_select_max_by_prob ():
220
+ M = np .arange (0 , 16 ).reshape (4 ,4 )
221
+ print M
222
+ C , R , W = _select_max_by_prob (M , 2 )
223
+ print "***************"
224
+ print C .shape
225
+ print C
226
+ print "***************"
227
+ print R
228
+ print R .shape
229
+ print "***************"
230
+ print W .shape
231
+ print W
232
+
233
+ def test_cur ():
234
+ C , U , R = cur (perfect_M (), 4 )
235
+ print C
236
+ print U
237
+ print R
238
+
239
+ def test_pca ():
240
+ M = simple_M ()
241
+ E , Sigma = pca (M )
242
+ print E
243
+ print "***************"
244
+ print Sigma
245
+ print "***************"
246
+ print M .dot (E )
247
+
150
248
#demo_querying_using_concepts()
151
249
#test_eigen_solver()
152
- test_svd ()
250
+ #test_svd()
251
+ #test_select_max_by_prob()
252
+ #test_cur()
253
+ #test_pca()
254
+ #test_power_iteration()
0 commit comments