Skip to content

Commit fd4ba4d

Browse files
committed
MAINT: set attributes as last action in DictVectorizer.fit
Prevents getting a half-initialized transformer when an exception occurs.
1 parent 800354b commit fd4ba4d

File tree

1 file changed

+8
-9
lines changed

1 file changed

+8
-9
lines changed

sklearn/feature_extraction/dict_vectorizer.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -107,24 +107,23 @@ def fit(self, X, y=None):
107107
-------
108108
self
109109
"""
110-
# collect all the possible feature names
111-
self.feature_names_ = []
112-
self.vocabulary_ = {}
113-
114-
vocab = self.vocabulary_
110+
feature_names = []
111+
vocab = {}
115112

116113
for x in X:
117114
for f, v in six.iteritems(x):
118115
if isinstance(v, six.string_types):
119116
f = "%s%s%s" % (f, self.separator, v)
120117
if f not in vocab:
121-
self.feature_names_.append(f)
118+
feature_names.append(f)
122119
vocab[f] = len(vocab)
123120

124121
if self.sort:
125-
self.feature_names_.sort()
126-
self.vocabulary_ = dict((f, i) for i, f in
127-
enumerate(self.feature_names_))
122+
feature_names.sort()
123+
vocab = dict((f, i) for i, f in enumerate(feature_names))
124+
125+
self.feature_names_ = feature_names
126+
self.vocabulary_ = vocab
128127

129128
return self
130129

0 commit comments

Comments
 (0)