ipython-books
diff --git a/‎notebooks/chapter08_ml/01_scikit.ipynb
Lines changed: 14 additions & 31 deletions b/‎notebooks/chapter08_ml/01_scikit.ipynb
Lines changed: 14 additions & 31 deletions
diff --git a/‎notebooks/chapter08_ml/04_text.ipynb
Lines changed: 16 additions & 39 deletions b/‎notebooks/chapter08_ml/04_text.ipynb
Lines changed: 16 additions & 39 deletions
@@ -31,9 +31,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
@@ -53,9 +51,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "f = lambda x: np.exp(3 * x)"
@@ -71,9 +67,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "x_tr = np.linspace(0., 2, 200)\n",
@@ -84,19 +78,18 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "4. Now, let's generate our data points within $[0, 1]$. We use the function $f$ and we add some Gaussian noise."
+    "4. Now, let's generate our data points within $[0, 1]$. We use the function $f$ and we add some Gaussian noise. In order to be able to demonstrate some effects, we use one specific set of data points generated in this fashion."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "x = np.array([0, .1, .2, .5, .8, .9, 1])\n",
-    "y = f(x) + np.random.randn(len(x))"
+    "# y = f(x) + np.random.randn(len(x))\n",
+    "y = np.array([0.59837698, 2.90450025, 4.73684354, 3.87158063, 11.77734608, 15.51112358, 20.08663964])"
    ]
   },
   {
@@ -109,9 +102,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "plt.figure(figsize=(6,3));\n",
@@ -129,9 +120,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# We create the model.\n",
@@ -161,9 +150,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "plt.figure(figsize=(6,3));\n",
@@ -185,9 +172,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "lrp = lm.LinearRegression()\n",
@@ -231,9 +216,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "ridge = lm.RidgeCV()\n",
@@ -287,9 +270,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.2"
+   "version": "3.6.0"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }
@@ -40,16 +40,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
     "import pandas as pd\n",
     "import sklearn\n",
-    "import sklearn.cross_validation as cv\n",
-    "import sklearn.grid_search as gs\n",
+    "import sklearn.model_selection as ms\n",
     "import sklearn.feature_extraction.text as text\n",
     "import sklearn.naive_bayes as nb\n",
     "import matplotlib.pyplot as plt\n",
@@ -66,9 +63,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "df = pd.read_csv(\"data/troll.csv\")"
@@ -84,9 +79,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "df[['Insult', 'Comment']].tail()"
@@ -102,9 +95,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "y = df['Insult']"
@@ -120,9 +111,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "tf = text.TfidfVectorizer()\n",
@@ -140,9 +129,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "print(\"Each sample has ~{0:.2f}% non-zero features.\".format(\n",
@@ -159,13 +146,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "(X_train, X_test,\n",
-    " y_train, y_test) = cv.train_test_split(X, y,\n",
+    " y_train, y_test) = ms.train_test_split(X, y,\n",
     "                                        test_size=.2)"
    ]
   },
@@ -179,12 +164,10 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "bnb = gs.GridSearchCV(nb.BernoulliNB(), param_grid={'alpha':np.logspace(-2., 2., 50)})\n",
+    "bnb = ms.GridSearchCV(nb.BernoulliNB(), param_grid={'alpha':np.logspace(-2., 2., 50)})\n",
     "bnb.fit(X_train, y_train);"
    ]
   },
@@ -198,9 +181,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "bnb.score(X_test, y_test)"
@@ -216,9 +197,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# We first get the words corresponding to each feature.\n",
@@ -239,9 +218,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "print(bnb.predict(tf.transform([\n",
@@ -277,9 +254,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.2"
+   "version": "3.6.0"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
 }