File tree Expand file tree Collapse file tree 1 file changed +14
-1
lines changed Expand file tree Collapse file tree 1 file changed +14
-1
lines changed Original file line number Diff line number Diff line change 2828from numpy .testing import assert_array_equal
2929from numpy .testing import assert_raises
3030from sklearn .utils .testing import (assert_in , assert_less , assert_greater ,
31- assert_warns_message )
31+ assert_warns_message , assert_raise_message )
3232
3333from collections import defaultdict , Mapping
3434from functools import partial
@@ -868,6 +868,19 @@ def test_non_unique_vocab():
868868 assert_raises (ValueError , CountVectorizer , vocabulary = vocab )
869869
870870
871+ def test_hashingvectorizer_nan_in_docs ():
872+ # np.nan can appear when using pandas to load text fields from a csv file
873+ # with missing values.
874+ message = "np.nan is an invalid document, expected byte or unicode string."
875+ exception = ValueError
876+
877+ def func ():
878+ hv = HashingVectorizer ()
879+ hv .fit_transform (['hello world' , np .nan , 'hello hello' ])
880+
881+ assert_raise_message (exception , message , func )
882+
883+
871884def test_tfidfvectorizer_binary ():
872885 # Non-regression test: TfidfVectorizer used to ignore its "binary" param.
873886 v = TfidfVectorizer (binary = True , use_idf = False , norm = None )
You can’t perform that action at this time.
0 commit comments