pwellner
diff --git a/‎doc/tutorial/basic/tutorial.rst‎
Lines changed: 2 additions & 2 deletions b/‎doc/tutorial/basic/tutorial.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/datasets/base.py‎
Lines changed: 44 additions & 20 deletions b/‎sklearn/datasets/base.py‎
Lines changed: 44 additions & 20 deletions
diff --git a/‎sklearn/datasets/tests/test_base.py‎
Lines changed: 5 additions & 0 deletions b/‎sklearn/datasets/tests/test_base.py‎
Lines changed: 5 additions & 0 deletions
@@ -136,7 +136,7 @@ learn::
     <sphx_glr_auto_examples_classification_plot_digits_classification.py>` illustrates how starting
     from the original problem one can shape the data for consumption in
     scikit-learn.
-    
+
 .. topic:: Loading from external datasets
 
     To load from an external dataset, please refer to :ref:`loading external datasets <external_datasets>`.
@@ -401,4 +401,4 @@ is similarly possible for an instance to be assigned multiple labels::
 In this case, the classifier is fit upon instances each assigned multiple labels.
 The :class:`MultiLabelBinarizer <sklearn.preprocessing.MultiLabelBinarizer>` is
 used to binarize the 2d array of multilabels to ``fit`` upon. As a result,
-``predict()`` returns a 2d array with multiple predicted labels for each instance.
+``predict()`` returns a 2d array with multiple predicted labels for each instance.
@@ -352,8 +352,9 @@ def load_iris(return_X_y=False):
         Dictionary-like object, the interesting attributes are:
         'data', the data to learn, 'target', the classification labels,
         'target_names', the meaning of the labels, 'feature_names', the
-        meaning of the features, and 'DESCR', the
-        full description of the dataset.
+        meaning of the features, 'DESCR', the full description of
+        the dataset, 'filename', the physical location of
+        iris csv dataset (added in version `0.20`).
 
     (data, target) : tuple if ``return_X_y`` is True
 
@@ -373,6 +374,7 @@ def load_iris(return_X_y=False):
     """
     module_path = dirname(__file__)
     data, target, target_names = load_data(module_path, 'iris.csv')
+    iris_csv_filename = join(module_path, 'data', 'iris.csv')
 
     with open(join(module_path, 'descr', 'iris.rst')) as rst_file:
         fdescr = rst_file.read()
@@ -384,7 +386,8 @@ def load_iris(return_X_y=False):
                  target_names=target_names,
                  DESCR=fdescr,
                  feature_names=['sepal length (cm)', 'sepal width (cm)',
-                                'petal length (cm)', 'petal width (cm)'])
+                                'petal length (cm)', 'petal width (cm)'],
+                 filename=iris_csv_filename)
 
 
 def load_breast_cancer(return_X_y=False):
@@ -415,8 +418,9 @@ def load_breast_cancer(return_X_y=False):
         Dictionary-like object, the interesting attributes are:
         'data', the data to learn, 'target', the classification labels,
         'target_names', the meaning of the labels, 'feature_names', the
-        meaning of the features, and 'DESCR', the
-        full description of the dataset.
+        meaning of the features, and 'DESCR', the full description of
+        the dataset, 'filename', the physical location of
+        breast cancer csv dataset (added in version `0.20`).
 
     (data, target) : tuple if ``return_X_y`` is True
 
@@ -440,6 +444,7 @@ def load_breast_cancer(return_X_y=False):
     """
     module_path = dirname(__file__)
     data, target, target_names = load_data(module_path, 'breast_cancer.csv')
+    csv_filename = join(module_path, 'data', 'breast_cancer.csv')
 
     with open(join(module_path, 'descr', 'breast_cancer.rst')) as rst_file:
         fdescr = rst_file.read()
@@ -466,7 +471,8 @@ def load_breast_cancer(return_X_y=False):
     return Bunch(data=data, target=target,
                  target_names=target_names,
                  DESCR=fdescr,
-                 feature_names=feature_names)
+                 feature_names=feature_names,
+                 filename=csv_filename)
 
 
 def load_digits(n_class=10, return_X_y=False):
@@ -573,18 +579,21 @@ def load_diabetes(return_X_y=False):
     -------
     data : Bunch
         Dictionary-like object, the interesting attributes are:
-        'data', the data to learn and 'target', the regression target for each
-        sample.
+        'data', the data to learn, 'target', the regression target for each
+        sample, 'data_filename', the physical location
+        of diabetes data csv dataset, and 'target_filename', the physical
+        location of diabetes targets csv datataset (added in version `0.20`).
 
     (data, target) : tuple if ``return_X_y`` is True
 
         .. versionadded:: 0.18
     """
-
     module_path = dirname(__file__)
     base_dir = join(module_path, 'data')
-    data = np.loadtxt(join(base_dir, 'diabetes_data.csv.gz'))
-    target = np.loadtxt(join(base_dir, 'diabetes_target.csv.gz'))
+    data_filename = join(base_dir, 'diabetes_data.csv.gz')
+    data = np.loadtxt(data_filename)
+    target_filename = join(base_dir, 'diabetes_target.csv.gz')
+    target = np.loadtxt(target_filename)
 
     with open(join(module_path, 'descr', 'diabetes.rst')) as rst_file:
         fdescr = rst_file.read()
@@ -594,7 +603,9 @@ def load_diabetes(return_X_y=False):
 
     return Bunch(data=data, target=target, DESCR=fdescr,
                  feature_names=['age', 'sex', 'bmi', 'bp',
-                                's1', 's2', 's3', 's4', 's5', 's6'])
+                                's1', 's2', 's3', 's4', 's5', 's6'],
+                 data_filename=data_filename,
+                 target_filename=target_filename)
 
 
 def load_linnerud(return_X_y=False):
@@ -622,21 +633,29 @@ def load_linnerud(return_X_y=False):
         'targets', the two multivariate datasets, with 'data' corresponding to
         the exercise and 'targets' corresponding to the physiological
         measurements, as well as 'feature_names' and 'target_names'.
+        In addition, you will also have access to 'data_filename',
+        the physical location of linnerud data csv dataset, and
+        'target_filename', the physical location of
+        linnerud targets csv datataset (added in version `0.20`).
 
     (data, target) : tuple if ``return_X_y`` is True
 
         .. versionadded:: 0.18
     """
     base_dir = join(dirname(__file__), 'data/')
+    data_filename = join(base_dir, 'linnerud_exercise.csv')
+    target_filename = join(base_dir, 'linnerud_physiological.csv')
+
     # Read data
-    data_exercise = np.loadtxt(base_dir + 'linnerud_exercise.csv', skiprows=1)
-    data_physiological = np.loadtxt(base_dir + 'linnerud_physiological.csv',
-                                    skiprows=1)
+    data_exercise = np.loadtxt(data_filename, skiprows=1)
+    data_physiological = np.loadtxt(target_filename, skiprows=1)
+
     # Read header
-    with open(base_dir + 'linnerud_exercise.csv') as f:
+    with open(data_filename) as f:
         header_exercise = f.readline().split()
-    with open(base_dir + 'linnerud_physiological.csv') as f:
+    with open(target_filename) as f:
         header_physiological = f.readline().split()
+
     with open(dirname(__file__) + '/descr/linnerud.rst') as f:
         descr = f.read()
 
@@ -646,7 +665,9 @@ def load_linnerud(return_X_y=False):
     return Bunch(data=data_exercise, feature_names=header_exercise,
                  target=data_physiological,
                  target_names=header_physiological,
-                 DESCR=descr)
+                 DESCR=descr,
+                 data_filename=data_filename,
+                 target_filename=target_filename)
 
 
 def load_boston(return_X_y=False):
@@ -672,7 +693,9 @@ def load_boston(return_X_y=False):
     data : Bunch
         Dictionary-like object, the interesting attributes are:
         'data', the data to learn, 'target', the regression targets,
-        and 'DESCR', the full description of the dataset.
+        'DESCR', the full description of the dataset,
+        and 'filename', the physical location of boston
+        csv dataset (added in version `0.20`).
 
     (data, target) : tuple if ``return_X_y`` is True
 
@@ -713,7 +736,8 @@ def load_boston(return_X_y=False):
                  target=target,
                  # last column is target value
                  feature_names=feature_names[:-1],
-                 DESCR=descr_text)
+                 DESCR=descr_text,
+                 filename=data_file_name)
 
 
 def load_sample_images():
 
@@ -197,6 +197,8 @@ def test_load_linnerud():
     assert_equal(res.target.shape, (20, 3))
     assert_equal(len(res.target_names), 3)
     assert_true(res.DESCR)
+    assert_true(os.path.exists(res.data_filename))
+    assert_true(os.path.exists(res.target_filename))
 
     # test return_X_y option
     X_y_tuple = load_linnerud(return_X_y=True)
@@ -212,6 +214,7 @@ def test_load_iris():
     assert_equal(res.target.size, 150)
     assert_equal(res.target_names.size, 3)
     assert_true(res.DESCR)
+    assert_true(os.path.exists(res.filename))
 
     # test return_X_y option
     X_y_tuple = load_iris(return_X_y=True)
@@ -242,6 +245,7 @@ def test_load_breast_cancer():
     assert_equal(res.target.size, 569)
     assert_equal(res.target_names.size, 2)
     assert_true(res.DESCR)
+    assert_true(os.path.exists(res.filename))
 
     # test return_X_y option
     X_y_tuple = load_breast_cancer(return_X_y=True)
@@ -257,6 +261,7 @@ def test_load_boston():
     assert_equal(res.target.size, 506)
     assert_equal(res.feature_names.size, 13)
     assert_true(res.DESCR)
+    assert_true(os.path.exists(res.filename))
 
     # test return_X_y option
     X_y_tuple = load_boston(return_X_y=True)