DOC Better documentation

luispedro · luispedro · commit f416def9ec8e · 2014-08-20T14:38:20.000+02:00
diff --git a/ch08/corrneighbours.py b/ch08/corrneighbours.py
@@ -9,17 +9,35 @@
 from all_correlations import all_correlations
 import numpy as np
 from load_ml100k import load
-def estimate_user(user, rest):
+
+def estimate_user(user, rest, num_neigbors=100):
+    '''Estimate ratings for user based on the binary rating matrix
+
+    Returns
+    -------
+    estimates: ndarray
+        Returns a rating estimate for each movie
+    '''
+
+    # Compute binary matrix correlations:
     bu = user > 0
     br = rest > 0
     ws = all_correlations(bu, br)
-    selected = ws.argsort()[-100:]
+
+    # Select top `num_neigbors`:
+    selected = ws.argsort()[-num_neigbors:]
+
+    # Use these to compute estimates:
     estimates = rest[selected].mean(0)
     estimates /= (.1 + br[selected].mean(0))
     return estimates
 
 
 def train_test(user, rest):
+    '''Train & test on a single user
+
+    Returns both the prediction error and the null error
+    '''
     estimates = estimate_user(user, rest)
     bu = user > 0
     br = rest > 0
@@ -49,7 +67,10 @@ def main():
     revs = (reviews > 0).sum(1)
     err = np.array(err)
     rmse = np.sqrt(err / revs[:, None])
+    print("Average of RMSE / Null-model RMSE")
     print(np.mean(rmse, 0))
+    print()
+    print("Average of RMSE / Null-model RMSE (users with more than 60 reviewed movies)")
     print(np.mean(rmse[revs > 60], 0))
 
 if __name__ == '__main__':