|
1 | | -import operator |
| 1 | +"""Determination of parameter bounds""" |
| 2 | +# Author: Paolo Losi |
| 3 | +# License: BSD 3 clause |
| 4 | + |
2 | 5 | import numpy as np |
3 | 6 |
|
| 7 | +from ..preprocessing import LabelBinarizer |
| 8 | +from ..utils.validation import check_consistent_length, check_array |
| 9 | +from ..utils.extmath import safe_sparse_dot |
| 10 | + |
4 | 11 |
|
5 | 12 | def l1_min_c(X, y, loss='l2', fit_intercept=True, intercept_scaling=1.0): |
6 | 13 | """ |
@@ -41,44 +48,23 @@ def l1_min_c(X, y, loss='l2', fit_intercept=True, intercept_scaling=1.0): |
41 | 48 | l1_min_c: float |
42 | 49 | minimum value for C |
43 | 50 | """ |
44 | | - import scipy.sparse as sp |
45 | 51 |
|
46 | 52 | if loss not in ('l2', 'log'): |
47 | 53 | raise ValueError('loss type not in ("l2", "log")') |
48 | 54 |
|
49 | | - y = np.asarray(y) |
50 | | - |
51 | | - if sp.issparse(X): |
52 | | - X = sp.csc_matrix(X) |
53 | | - hstack = sp.hstack |
54 | | - dot = operator.mul |
55 | | - else: |
56 | | - X = np.asarray(X) |
57 | | - hstack = np.hstack |
58 | | - dot = np.dot |
| 55 | + X = check_array(X, accept_sparse='csc') |
| 56 | + check_consistent_length(X, y) |
59 | 57 |
|
| 58 | + Y = LabelBinarizer(neg_label=-1).fit_transform(y).T |
| 59 | + # maximum absolute value over classes and features |
| 60 | + den = np.max(np.abs(safe_sparse_dot(Y, X))) |
60 | 61 | if fit_intercept: |
61 | 62 | bias = intercept_scaling * np.ones((np.size(y), 1)) |
62 | | - X = hstack((X, bias)) |
63 | | - |
64 | | - classes = np.unique(y) |
65 | | - n_classes = np.size(classes) |
66 | | - if n_classes <= 2: |
67 | | - c = classes[0] |
68 | | - y = y.reshape((1, -1)) |
69 | | - _y = np.empty(y.shape) |
70 | | - _y[y == c] = 1 |
71 | | - _y[y != c] = -1 |
72 | | - else: |
73 | | - _y = np.empty((n_classes, np.size(y))) |
74 | | - for i, c in enumerate(classes): |
75 | | - _y[i, y == c] = 1 |
76 | | - _y[i, y != c] = -1 |
77 | | - |
78 | | - den = np.max(np.abs(dot(_y, X))) |
| 63 | + den = max(den, abs(np.dot(Y, bias)).max()) |
79 | 64 |
|
80 | 65 | if den == 0.0: |
81 | | - raise ValueError('Ill-posed l1_min_c calculation') |
| 66 | + raise ValueError('Ill-posed l1_min_c calculation: l1 will always ' |
| 67 | + 'select zero coefficients for this data') |
82 | 68 | if loss == 'l2': |
83 | 69 | return 0.5 / den |
84 | 70 | else: # loss == 'log': |
|
0 commit comments