|
4 | 4 | removeall, unique, product, mode, argmax, argmax_random_tie, isclose, gaussian, |
5 | 5 | dotproduct, vector_add, scalar_vector_product, weighted_sample_with_replacement, |
6 | 6 | weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table, |
7 | | - open_data, sigmoid_derivative, probability |
| 7 | + open_data, sigmoid_derivative, probability, norm, matrix_multiplication |
8 | 8 | ) |
9 | 9 |
|
10 | 10 | import copy |
@@ -377,6 +377,66 @@ def predict(example): |
377 | 377 | # ______________________________________________________________________________ |
378 | 378 |
|
379 | 379 |
|
| 380 | +def truncated_svd(X, num_val=2, max_iter=1000): |
| 381 | + """Computes the first component of SVD""" |
| 382 | + |
| 383 | + def normalize_vec(X, n = 2): |
| 384 | + """Normalizes two parts (:m and m:) of the vector""" |
| 385 | + X_m = X[:m] |
| 386 | + X_n = X[m:] |
| 387 | + norm_X_m = norm(X_m, n) |
| 388 | + Y_m = [x/norm_X_m for x in X_m] |
| 389 | + norm_X_n = norm(X_n, n) |
| 390 | + Y_n = [x/norm_X_n for x in X_n] |
| 391 | + return Y_m + Y_n |
| 392 | + |
| 393 | + def remove_component(X): |
| 394 | + """Removes components of already obtained eigen vectors from X""" |
| 395 | + X_m = X[:m] |
| 396 | + X_n = X[m:] |
| 397 | + for eivec in eivec_m: |
| 398 | + coeff = dotproduct(X_m, eivec) |
| 399 | + X_m = [x1 - coeff*x2 for x1, x2 in zip(X_m, eivec)] |
| 400 | + for eivec in eivec_n: |
| 401 | + coeff = dotproduct(X_n, eivec) |
| 402 | + X_n = [x1 - coeff*x2 for x1, x2 in zip(X_n, eivec)] |
| 403 | + return X_m + X_n |
| 404 | + |
| 405 | + m, n = len(X), len(X[0]) |
| 406 | + A = [[0 for _ in range(n + m)] for _ in range(n + m)] |
| 407 | + for i in range(m): |
| 408 | + for j in range(n): |
| 409 | + A[i][m + j] = A[m + j][i] = X[i][j] |
| 410 | + |
| 411 | + eivec_m = [] |
| 412 | + eivec_n = [] |
| 413 | + eivals = [] |
| 414 | + |
| 415 | + for _ in range(num_val): |
| 416 | + X = [random.random() for _ in range(m + n)] |
| 417 | + X = remove_component(X) |
| 418 | + X = normalize_vec(X) |
| 419 | + |
| 420 | + for _ in range(max_iter): |
| 421 | + old_X = X |
| 422 | + X = matrix_multiplication(A, [[x] for x in X]) |
| 423 | + X = [x[0] for x in X] |
| 424 | + X = remove_component(X) |
| 425 | + X = normalize_vec(X) |
| 426 | + # check for convergence |
| 427 | + if norm([x1 - x2 for x1, x2 in zip(old_X, X)]) <= 1e-10: |
| 428 | + break |
| 429 | + |
| 430 | + projected_X = matrix_multiplication(A, [[x] for x in X]) |
| 431 | + projected_X = [x[0] for x in projected_X] |
| 432 | + eivals.append(norm(projected_X, 1)/norm(X, 1)) |
| 433 | + eivec_m.append(X[:m]) |
| 434 | + eivec_n.append(X[m:]) |
| 435 | + return (eivec_m, eivec_n, eivals) |
| 436 | + |
| 437 | +# ______________________________________________________________________________ |
| 438 | + |
| 439 | + |
380 | 440 | class DecisionFork: |
381 | 441 | """A fork of a decision tree holds an attribute to test, and a dict |
382 | 442 | of branches, one for each of the attribute's values.""" |
|
0 commit comments