Skip to content

Commit 70f4d47

Browse files
committed
Merge pull request scikit-learn#3401 from vene/scorer_weights
[MRG] scorer: add sample_weight support (+test)
2 parents 4ec8630 + 6a4aa1d commit 70f4d47

File tree

3 files changed

+92
-9
lines changed

3 files changed

+92
-9
lines changed

doc/whats_new.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,12 @@ Enhancements
2626
............
2727

2828

29+
- Add support for sample weights in scorer objects. Metrics with sample
30+
weight support will automatically benefit from it.
31+
32+
2933
Documentation improvements
30-
...........................
34+
..........................
3135

3236

3337
Bug fixes

sklearn/metrics/scorer.py

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def __init__(self, score_func, sign, kwargs):
3939
self._sign = sign
4040

4141
@abstractmethod
42-
def __call__(self, estimator, X, y):
42+
def __call__(self, estimator, X, y, sample_weight=None):
4343
pass
4444

4545
def __repr__(self):
@@ -56,7 +56,7 @@ def _factory_args(self):
5656

5757

5858
class _PredictScorer(_BaseScorer):
59-
def __call__(self, estimator, X, y_true):
59+
def __call__(self, estimator, X, y_true, sample_weight=None):
6060
"""Evaluate predicted target values for X relative to y_true.
6161
6262
Parameters
@@ -71,17 +71,26 @@ def __call__(self, estimator, X, y_true):
7171
y_true : array-like
7272
Gold standard target values for X.
7373
74+
sample_weight : array-like, optional (default=None)
75+
Sample weights.
76+
7477
Returns
7578
-------
7679
score : float
7780
Score function applied to prediction of estimator on X.
7881
"""
7982
y_pred = estimator.predict(X)
80-
return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
81-
83+
if sample_weight is not None:
84+
return self._sign * self._score_func(y_true, y_pred,
85+
sample_weight=sample_weight,
86+
**self._kwargs)
87+
else:
88+
return self._sign * self._score_func(y_true, y_pred,
89+
**self._kwargs)
90+
8291

8392
class _ProbaScorer(_BaseScorer):
84-
def __call__(self, clf, X, y):
93+
def __call__(self, clf, X, y, sample_weight=None):
8594
"""Evaluate predicted probabilities for X relative to y_true.
8695
8796
Parameters
@@ -97,20 +106,28 @@ def __call__(self, clf, X, y):
97106
Gold standard target values for X. These must be class labels,
98107
not probabilities.
99108
109+
sample_weight : array-like, optional (default=None)
110+
Sample weights.
111+
100112
Returns
101113
-------
102114
score : float
103115
Score function applied to prediction of estimator on X.
104116
"""
105117
y_pred = clf.predict_proba(X)
106-
return self._sign * self._score_func(y, y_pred, **self._kwargs)
118+
if sample_weight is not None:
119+
return self._sign * self._score_func(y, y_pred,
120+
sample_weight=sample_weight,
121+
**self._kwargs)
122+
else:
123+
return self._sign * self._score_func(y, y_pred, **self._kwargs)
107124

108125
def _factory_args(self):
109126
return ", needs_proba=True"
110127

111128

112129
class _ThresholdScorer(_BaseScorer):
113-
def __call__(self, clf, X, y):
130+
def __call__(self, clf, X, y, sample_weight=None):
114131
"""Evaluate decision function output for X relative to y_true.
115132
116133
Parameters
@@ -128,6 +145,9 @@ def __call__(self, clf, X, y):
128145
Gold standard target values for X. These must be class labels,
129146
not decision function values.
130147
148+
sample_weight : array-like, optional (default=None)
149+
Sample weights.
150+
131151
Returns
132152
-------
133153
score : float
@@ -152,7 +172,12 @@ def __call__(self, clf, X, y):
152172
elif isinstance(y_pred, list):
153173
y_pred = np.vstack([p[:, -1] for p in y_pred]).T
154174

155-
return self._sign * self._score_func(y, y_pred, **self._kwargs)
175+
if sample_weight is not None:
176+
return self._sign * self._score_func(y, y_pred,
177+
sample_weight=sample_weight,
178+
**self._kwargs)
179+
else:
180+
return self._sign * self._score_func(y, y_pred, **self._kwargs)
156181

157182
def _factory_args(self):
158183
return ", needs_threshold=True"

sklearn/metrics/tests/test_score_objects.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from sklearn.utils.testing import assert_raises_regexp
88
from sklearn.utils.testing import assert_true
99
from sklearn.utils.testing import ignore_warnings
10+
from sklearn.utils.testing import assert_equal
11+
from sklearn.utils.testing import assert_not_equal
1012

1113
from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score,
1214
log_loss)
@@ -15,16 +17,25 @@
1517
from sklearn.metrics import make_scorer, SCORERS
1618
from sklearn.svm import LinearSVC
1719
from sklearn.cluster import KMeans
20+
from sklearn.dummy import DummyRegressor
1821
from sklearn.linear_model import Ridge, LogisticRegression
1922
from sklearn.tree import DecisionTreeClassifier
2023
from sklearn.datasets import make_blobs
24+
from sklearn.datasets import make_classification
2125
from sklearn.datasets import make_multilabel_classification
2226
from sklearn.datasets import load_diabetes
2327
from sklearn.cross_validation import train_test_split, cross_val_score
2428
from sklearn.grid_search import GridSearchCV
2529
from sklearn.multiclass import OneVsRestClassifier
2630

2731

32+
REGRESSION_SCORERS = ['r2', 'mean_absolute_error', 'mean_squared_error']
33+
CLF_SCORERS = ['accuracy', 'f1', 'roc_auc', 'average_precision', 'precision',
34+
'recall', 'log_loss',
35+
'adjusted_rand_score' # not really, but works
36+
]
37+
38+
2839
class EstimatorWithoutFit(object):
2940
"""Dummy estimator to test check_scoring"""
3041
pass
@@ -229,3 +240,46 @@ def test_raises_on_score_list():
229240
grid_search = GridSearchCV(clf, scoring=f1_scorer_no_average,
230241
param_grid={'max_depth': [1, 2]})
231242
assert_raises(ValueError, grid_search.fit, X, y)
243+
244+
245+
def test_scorer_sample_weight():
246+
"""Test that scorers support sample_weight or raise sensible errors"""
247+
248+
# Unlike the metrics invariance test, in the scorer case it's harder
249+
# to ensure that, on the classifier output, weighted and unweighted
250+
# scores really should be unequal.
251+
X, y = make_classification(random_state=0)
252+
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
253+
sample_weight = np.ones_like(y_test)
254+
sample_weight[:10] = 0
255+
256+
# get sensible estimators for each metric
257+
sensible_regr = DummyRegressor(strategy='median')
258+
sensible_regr.fit(X_train, y_train)
259+
sensible_clf = DecisionTreeClassifier()
260+
sensible_clf.fit(X_train, y_train)
261+
estimator = dict([(name, sensible_regr)
262+
for name in REGRESSION_SCORERS] +
263+
[(name, sensible_clf)
264+
for name in CLF_SCORERS])
265+
266+
for name, scorer in SCORERS.items():
267+
try:
268+
weighted = scorer(estimator[name], X_test, y_test,
269+
sample_weight=sample_weight)
270+
ignored = scorer(estimator[name], X_test[10:], y_test[10:])
271+
unweighted = scorer(estimator[name], X_test, y_test)
272+
assert_not_equal(weighted, unweighted,
273+
"scorer {0} behaves identically when called with "
274+
"sample weights: {1} vs {2}".format(name,
275+
weighted,
276+
unweighted))
277+
assert_equal(weighted, ignored,
278+
"scorer {0} behaves differently when ignoring "
279+
"samples and setting sample_weight to 0: "
280+
"{1} vs {2}".format(name, weighted, ignored))
281+
282+
except TypeError as e:
283+
assert_true("sample_weight" in str(e),
284+
"scorer {0} raises unhelpful exception when called "
285+
"with sample weights: {1}".format(name, str(e)))

0 commit comments

Comments
 (0)