Skip to content

Commit c5f1dab

Browse files
committed
ENH Use dense arrays throughout
Introducing sparse matrices is an unnecessary complication
1 parent 8bfcb5b commit c5f1dab

File tree

7 files changed

+25
-25
lines changed

7 files changed

+25
-25
lines changed

ch08/corrneighbours.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,13 @@ def train_test(user, rest):
4949

5050

5151
def all_estimates(reviews):
52-
reviews = reviews.toarray()
5352
estimates = np.zeros_like(reviews)
5453
for i in range(reviews.shape[0]):
5554
estimates[i] = estimate_user(reviews[i], np.delete(reviews, i, 0))
5655
return estimates
5756

5857
def main():
5958
reviews = load()
60-
reviews = reviews.toarray()
6159

6260
err = []
6361
for i in range(reviews.shape[0]):
@@ -67,11 +65,16 @@ def main():
6765
revs = (reviews > 0).sum(1)
6866
err = np.array(err)
6967
rmse = np.sqrt(err / revs[:, None])
68+
69+
rmse_model, rmse_null = np.mean(rmse, 0)
70+
7071
print("Average of RMSE / Null-model RMSE")
71-
print(np.mean(rmse, 0))
72+
print("{:.2}\t{:.2} (improvement: {:.1%}".format(rmse_model, rmse_null, (rmse_null-rmse_model)/rmse_null))
7273
print()
74+
75+
rmse_model, rmse_null = np.mean(rmse[revs > 60], 0)
7376
print("Average of RMSE / Null-model RMSE (users with more than 60 reviewed movies)")
74-
print(np.mean(rmse[revs > 60], 0))
77+
print("{:.2}\t{:.2} (improvement: {:.1%}".format(rmse_model, rmse_null, (rmse_null-rmse_model)/rmse_null))
7578

7679
if __name__ == '__main__':
7780
main()

ch08/figure3.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from load_ml100k import load
99
from matplotlib import pyplot as plt
1010
data = load()
11-
data = data.toarray()
1211
plt.gray()
1312
plt.imshow(data[:200, :200], interpolation='nearest')
1413
plt.xlabel('User ID')

ch08/load_ml100k.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@ def load():
1717
ij -= 1 # original data is in 1-based system
1818
values = data[:, 2]
1919
reviews = sparse.csc_matrix((values, ij.T)).astype(float)
20-
return reviews
20+
return reviews.toarray()

ch08/similar_movie.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def all_estimates(reviews, k=1):
6565

6666
if __name__ == '__main__':
6767
from load_ml100k import load
68-
reviews = load().torarray()
68+
reviews = load()
6969
estimates = all_estimates(reviews)
7070
error = (estimates - reviews)
7171
error **= 2

ch08/stacked.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,6 @@
2121
similar_movie.all_estimates(reviews),
2222
])
2323

24-
reviews = reviews.toarray()
25-
26-
2724
total_error = 0.0
2825
coefficients = []
2926
for u in range(reviews.shape[0]):

ch08/stacked5.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@
1111
import similar_movie
1212
import usermodel
1313

14-
sreviews = load()
15-
reviews = sreviews.toarray()
14+
reviews = load()
1615
# Collect several estimates
1716
es = np.array([
18-
usermodel.all_estimates(sreviews),
17+
usermodel.all_estimates(reviews),
1918
similar_movie.all_estimates(reviews, k=1),
2019
similar_movie.all_estimates(reviews, k=2),
2120
similar_movie.all_estimates(reviews, k=3),

ch08/usermodel.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,42 +6,43 @@
66
# It is made available under the MIT License
77

88
import numpy as np
9-
from sklearn.linear_model import LassoCV, RidgeCV, ElasticNetCV
9+
from sklearn.linear_model import ElasticNetCV
1010
from sklearn.cross_validation import KFold
11-
from load_ml100k import load
1211

1312

1413
def learn_for(reviews, i):
1514
reg = ElasticNetCV(fit_intercept=True, alphas=[
1615
0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
16+
nusers,nmovies = reviews.shape
1717
u = reviews[i]
18-
us = range(reviews.shape[0])
19-
del us[i]
20-
ps, = np.where(u.toarray().ravel() > 0)
18+
us = np.arange(reviews.shape[0])
19+
us = np.delete(us, i)
20+
ps, = np.where(u.ravel() > 0)
2121
x = reviews[us][:, ps].T
22-
y = u.data
23-
kf = KFold(len(y), n_folds=4)
22+
kf = KFold(len(ps), n_folds=4)
2423
predictions = np.zeros(len(ps))
2524
for train, test in kf:
26-
xc = x[train].copy().toarray()
25+
xc = x[train].copy()
2726
x1 = np.array([xi[xi > 0].mean() for xi in xc])
2827
x1 = np.nan_to_num(x1)
2928

3029
for i in range(xc.shape[0]):
3130
xc[i] -= (xc[i] > 0) * x1[i]
3231

33-
reg.fit(xc, y[train] - x1)
32+
reg.fit(xc, u[train] - x1)
3433

35-
xc = x[test].copy().toarray()
34+
xc = x[test].copy()
3635
x1 = np.array([xi[xi > 0].mean() for xi in xc])
3736
x1 = np.nan_to_num(x1)
3837

3938
for i in range(xc.shape[0]):
4039
xc[i] -= (xc[i] > 0) * x1[i]
4140

42-
p = np.array(map(reg.predict, xc)).ravel()
41+
p = reg.predict(xc).ravel()
4342
predictions[test] = p
44-
return predictions
43+
fill_preds = np.zeros(nmovies)
44+
fill_preds[ps] = predictions
45+
return fill_preds
4546

4647

4748
def all_estimates(reviews):
@@ -50,3 +51,4 @@ def all_estimates(reviews):
5051
s = learn_for(reviews, i)
5152
whole_data.append(s)
5253
return np.array(whole_data)
54+

0 commit comments

Comments
 (0)