@@ -352,8 +352,9 @@ def load_iris(return_X_y=False):
352352 Dictionary-like object, the interesting attributes are:
353353 'data', the data to learn, 'target', the classification labels,
354354 'target_names', the meaning of the labels, 'feature_names', the
355- meaning of the features, and 'DESCR', the
356- full description of the dataset.
355+ meaning of the features, 'DESCR', the full description of
356+ the dataset, 'filename', the physical location of
357+ iris csv dataset (added in version `0.20`).
357358
358359 (data, target) : tuple if ``return_X_y`` is True
359360
@@ -373,6 +374,7 @@ def load_iris(return_X_y=False):
373374 """
374375 module_path = dirname (__file__ )
375376 data , target , target_names = load_data (module_path , 'iris.csv' )
377+ iris_csv_filename = join (module_path , 'data' , 'iris.csv' )
376378
377379 with open (join (module_path , 'descr' , 'iris.rst' )) as rst_file :
378380 fdescr = rst_file .read ()
@@ -384,7 +386,8 @@ def load_iris(return_X_y=False):
384386 target_names = target_names ,
385387 DESCR = fdescr ,
386388 feature_names = ['sepal length (cm)' , 'sepal width (cm)' ,
387- 'petal length (cm)' , 'petal width (cm)' ])
389+ 'petal length (cm)' , 'petal width (cm)' ],
390+ filename = iris_csv_filename )
388391
389392
390393def load_breast_cancer (return_X_y = False ):
@@ -415,8 +418,9 @@ def load_breast_cancer(return_X_y=False):
415418 Dictionary-like object, the interesting attributes are:
416419 'data', the data to learn, 'target', the classification labels,
417420 'target_names', the meaning of the labels, 'feature_names', the
418- meaning of the features, and 'DESCR', the
419- full description of the dataset.
421+ meaning of the features, and 'DESCR', the full description of
422+ the dataset, 'filename', the physical location of
423+ breast cancer csv dataset (added in version `0.20`).
420424
421425 (data, target) : tuple if ``return_X_y`` is True
422426
@@ -440,6 +444,7 @@ def load_breast_cancer(return_X_y=False):
440444 """
441445 module_path = dirname (__file__ )
442446 data , target , target_names = load_data (module_path , 'breast_cancer.csv' )
447+ csv_filename = join (module_path , 'data' , 'breast_cancer.csv' )
443448
444449 with open (join (module_path , 'descr' , 'breast_cancer.rst' )) as rst_file :
445450 fdescr = rst_file .read ()
@@ -466,7 +471,8 @@ def load_breast_cancer(return_X_y=False):
466471 return Bunch (data = data , target = target ,
467472 target_names = target_names ,
468473 DESCR = fdescr ,
469- feature_names = feature_names )
474+ feature_names = feature_names ,
475+ filename = csv_filename )
470476
471477
472478def load_digits (n_class = 10 , return_X_y = False ):
@@ -573,18 +579,21 @@ def load_diabetes(return_X_y=False):
573579 -------
574580 data : Bunch
575581 Dictionary-like object, the interesting attributes are:
576- 'data', the data to learn and 'target', the regression target for each
577- sample.
582+ 'data', the data to learn, 'target', the regression target for each
583+ sample, 'data_filename', the physical location
584+ of diabetes data csv dataset, and 'target_filename', the physical
585+ location of diabetes targets csv datataset (added in version `0.20`).
578586
579587 (data, target) : tuple if ``return_X_y`` is True
580588
581589 .. versionadded:: 0.18
582590 """
583-
584591 module_path = dirname (__file__ )
585592 base_dir = join (module_path , 'data' )
586- data = np .loadtxt (join (base_dir , 'diabetes_data.csv.gz' ))
587- target = np .loadtxt (join (base_dir , 'diabetes_target.csv.gz' ))
593+ data_filename = join (base_dir , 'diabetes_data.csv.gz' )
594+ data = np .loadtxt (data_filename )
595+ target_filename = join (base_dir , 'diabetes_target.csv.gz' )
596+ target = np .loadtxt (target_filename )
588597
589598 with open (join (module_path , 'descr' , 'diabetes.rst' )) as rst_file :
590599 fdescr = rst_file .read ()
@@ -594,7 +603,9 @@ def load_diabetes(return_X_y=False):
594603
595604 return Bunch (data = data , target = target , DESCR = fdescr ,
596605 feature_names = ['age' , 'sex' , 'bmi' , 'bp' ,
597- 's1' , 's2' , 's3' , 's4' , 's5' , 's6' ])
606+ 's1' , 's2' , 's3' , 's4' , 's5' , 's6' ],
607+ data_filename = data_filename ,
608+ target_filename = target_filename )
598609
599610
600611def load_linnerud (return_X_y = False ):
@@ -622,21 +633,29 @@ def load_linnerud(return_X_y=False):
622633 'targets', the two multivariate datasets, with 'data' corresponding to
623634 the exercise and 'targets' corresponding to the physiological
624635 measurements, as well as 'feature_names' and 'target_names'.
636+ In addition, you will also have access to 'data_filename',
637+ the physical location of linnerud data csv dataset, and
638+ 'target_filename', the physical location of
639+ linnerud targets csv datataset (added in version `0.20`).
625640
626641 (data, target) : tuple if ``return_X_y`` is True
627642
628643 .. versionadded:: 0.18
629644 """
630645 base_dir = join (dirname (__file__ ), 'data/' )
646+ data_filename = join (base_dir , 'linnerud_exercise.csv' )
647+ target_filename = join (base_dir , 'linnerud_physiological.csv' )
648+
631649 # Read data
632- data_exercise = np .loadtxt (base_dir + 'linnerud_exercise.csv' , skiprows = 1 )
633- data_physiological = np .loadtxt (base_dir + 'linnerud_physiological.csv' ,
634- skiprows = 1 )
650+ data_exercise = np .loadtxt (data_filename , skiprows = 1 )
651+ data_physiological = np .loadtxt (target_filename , skiprows = 1 )
652+
635653 # Read header
636- with open (base_dir + 'linnerud_exercise.csv' ) as f :
654+ with open (data_filename ) as f :
637655 header_exercise = f .readline ().split ()
638- with open (base_dir + 'linnerud_physiological.csv' ) as f :
656+ with open (target_filename ) as f :
639657 header_physiological = f .readline ().split ()
658+
640659 with open (dirname (__file__ ) + '/descr/linnerud.rst' ) as f :
641660 descr = f .read ()
642661
@@ -646,7 +665,9 @@ def load_linnerud(return_X_y=False):
646665 return Bunch (data = data_exercise , feature_names = header_exercise ,
647666 target = data_physiological ,
648667 target_names = header_physiological ,
649- DESCR = descr )
668+ DESCR = descr ,
669+ data_filename = data_filename ,
670+ target_filename = target_filename )
650671
651672
652673def load_boston (return_X_y = False ):
@@ -672,7 +693,9 @@ def load_boston(return_X_y=False):
672693 data : Bunch
673694 Dictionary-like object, the interesting attributes are:
674695 'data', the data to learn, 'target', the regression targets,
675- and 'DESCR', the full description of the dataset.
696+ 'DESCR', the full description of the dataset,
697+ and 'filename', the physical location of boston
698+ csv dataset (added in version `0.20`).
676699
677700 (data, target) : tuple if ``return_X_y`` is True
678701
@@ -713,7 +736,8 @@ def load_boston(return_X_y=False):
713736 target = target ,
714737 # last column is target value
715738 feature_names = feature_names [:- 1 ],
716- DESCR = descr_text )
739+ DESCR = descr_text ,
740+ filename = data_file_name )
717741
718742
719743def load_sample_images ():
0 commit comments