Skip to content

Commit f46cd25

Browse files
committed
Merge pull request scikit-learn#4448 from pprett/fix-gbrt-min-leaf-weight
Fix gbrt min leaf weight
2 parents 59efcb5 + d0db91a commit f46cd25

File tree

2 files changed

+27
-4
lines changed

2 files changed

+27
-4
lines changed

sklearn/ensemble/gradient_boosting.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1002,12 +1002,19 @@ def _fit_stages(self, X, y, y_pred, sample_weight, random_state,
10021002
n_inbag = max(1, int(self.subsample * n_samples))
10031003
loss_ = self.loss_
10041004

1005+
# Set min_weight_leaf from min_weight_fraction_leaf
1006+
if self.min_weight_fraction_leaf != 0. and sample_weight is not None:
1007+
min_weight_leaf = (self.min_weight_fraction_leaf *
1008+
np.sum(sample_weight))
1009+
else:
1010+
min_weight_leaf = 0.
1011+
10051012
# init criterion and splitter
10061013
criterion = FriedmanMSE(1)
10071014
splitter = PresortBestSplitter(criterion,
10081015
self.max_features_,
10091016
self.min_samples_leaf,
1010-
self.min_weight_fraction_leaf,
1017+
min_weight_leaf,
10111018
random_state)
10121019

10131020
if self.verbose:

sklearn/ensemble/tests/test_gradient_boosting.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -976,10 +976,26 @@ def test_non_uniform_weights_toy_edge_case_reg():
976976
y = [0, 0, 1, 0]
977977
# ignore the first 2 training samples by setting their weight to 0
978978
sample_weight = [0, 0, 1, 1]
979-
for loss in ('ls', 'huber', 'lad', 'quantile'):
980-
gb = GradientBoostingRegressor(n_estimators=5)
979+
for loss in ('huber', 'ls', 'lad', 'quantile'):
980+
gb = GradientBoostingRegressor(learning_rate=1.0, n_estimators=2, loss=loss)
981981
gb.fit(X, y, sample_weight=sample_weight)
982-
assert_true(gb.predict([[1, 0]])[0] > 0.5)
982+
assert_greater(gb.predict([[1, 0]])[0], 0.5)
983+
984+
985+
def test_non_uniform_weights_toy_min_weight_leaf():
986+
"""Regression test for https://github.com/scikit-learn/scikit-learn/issues/4447 """
987+
X = [[1, 0],
988+
[1, 0],
989+
[1, 0],
990+
[0, 1],
991+
]
992+
y = [0, 0, 1, 0]
993+
# ignore the first 2 training samples by setting their weight to 0
994+
sample_weight = [0, 0, 1, 1]
995+
gb = GradientBoostingRegressor(n_estimators=5, min_weight_fraction_leaf=0.1)
996+
gb.fit(X, y, sample_weight=sample_weight)
997+
assert_true(gb.predict([[1, 0]])[0] > 0.5)
998+
assert_almost_equal(gb.estimators_[0,0].splitter.min_weight_leaf, 0.2)
983999

9841000

9851001
def test_non_uniform_weights_toy_edge_case_clf():

0 commit comments

Comments
 (0)