|
2 | 2 |
|
3 | 3 | class NormalizePositive(object): |
4 | 4 |
|
| 5 | + def __init__(self, axis=0): |
| 6 | + self.axis = axis |
| 7 | + |
5 | 8 | def fit(self, features, y=None): |
6 | | - # count features that are greater than zero in axis 0: |
| 9 | + # count features that are greater than zero in axis `self.axis`: |
| 10 | + if self.axis == 1: |
| 11 | + features = features.T |
7 | 12 | binary = (features > 0) |
8 | | - count0 = binary.sum(axis=0) |
| 13 | + count = binary.sum(axis=0) |
9 | 14 |
|
10 | 15 | # to avoid division by zero, set zero counts to one: |
11 | | - count0[count0 == 0] = 1. |
| 16 | + count[count == 0] = 1. |
12 | 17 |
|
13 | | - self.mean = features.sum(axis=0)/count0 |
| 18 | + self.mean = features.sum(axis=0)/count |
14 | 19 |
|
15 | 20 | # Compute variance by average squared difference to the mean, but only |
16 | 21 | # consider differences where binary is True (i.e., where there was a |
17 | 22 | # true rating): |
18 | 23 | diff = (features - self.mean) * binary |
19 | 24 | diff **= 2 |
20 | 25 | # regularize the estimate of std by adding 0.1 |
21 | | - self.std = np.sqrt(0.1 + diff.sum(axis=0)/count0) |
| 26 | + self.std = np.sqrt(0.1 + diff.sum(axis=0)/count) |
22 | 27 | return self |
23 | 28 |
|
24 | 29 | def transform(self, features): |
| 30 | + if self.axis == 1: |
| 31 | + features = features.T |
25 | 32 | binary = (features > 0) |
26 | 33 | features = features - self.mean |
27 | 34 | features /= self.std |
28 | 35 | features *= binary |
| 36 | + if self.axis == 1: |
| 37 | + features = features.T |
29 | 38 | return features |
30 | 39 |
|
31 | 40 | def inverse_transform(self, features, copy=True): |
32 | 41 | if copy: |
33 | 42 | features = features.copy() |
| 43 | + if self.axis == 1: |
| 44 | + features = features.T |
34 | 45 | features *= self.std |
35 | 46 | features += self.mean |
| 47 | + if self.axis == 1: |
| 48 | + features = features.T |
36 | 49 | return features |
37 | 50 |
|
38 | 51 | def fit_transform(self, features): |
|
0 commit comments