|
9 | 9 | from all_correlations import all_correlations |
10 | 10 | import numpy as np |
11 | 11 | from load_ml100k import load |
12 | | -def estimate_user(user, rest): |
| 12 | + |
| 13 | +def estimate_user(user, rest, num_neigbors=100): |
| 14 | + '''Estimate ratings for user based on the binary rating matrix |
| 15 | +
|
| 16 | + Returns |
| 17 | + ------- |
| 18 | + estimates: ndarray |
| 19 | + Returns a rating estimate for each movie |
| 20 | + ''' |
| 21 | + |
| 22 | + # Compute binary matrix correlations: |
13 | 23 | bu = user > 0 |
14 | 24 | br = rest > 0 |
15 | 25 | ws = all_correlations(bu, br) |
16 | | - selected = ws.argsort()[-100:] |
| 26 | + |
| 27 | + # Select top `num_neigbors`: |
| 28 | + selected = ws.argsort()[-num_neigbors:] |
| 29 | + |
| 30 | + # Use these to compute estimates: |
17 | 31 | estimates = rest[selected].mean(0) |
18 | 32 | estimates /= (.1 + br[selected].mean(0)) |
19 | 33 | return estimates |
20 | 34 |
|
21 | 35 |
|
22 | 36 | def train_test(user, rest): |
| 37 | + '''Train & test on a single user |
| 38 | +
|
| 39 | + Returns both the prediction error and the null error |
| 40 | + ''' |
23 | 41 | estimates = estimate_user(user, rest) |
24 | 42 | bu = user > 0 |
25 | 43 | br = rest > 0 |
@@ -49,7 +67,10 @@ def main(): |
49 | 67 | revs = (reviews > 0).sum(1) |
50 | 68 | err = np.array(err) |
51 | 69 | rmse = np.sqrt(err / revs[:, None]) |
| 70 | + print("Average of RMSE / Null-model RMSE") |
52 | 71 | print(np.mean(rmse, 0)) |
| 72 | + print() |
| 73 | + print("Average of RMSE / Null-model RMSE (users with more than 60 reviewed movies)") |
53 | 74 | print(np.mean(rmse[revs > 60], 0)) |
54 | 75 |
|
55 | 76 | if __name__ == '__main__': |
|
0 commit comments