|
1 | 1 | {
|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 |
| - "cell_type": "markdown", |
5 |
| - "metadata": {}, |
6 |
| - "source": [ |
7 |
| - "Logistic Regression - Overview\n", |
8 |
| - "===========\n", |
9 |
| - "***\n", |
10 |
| - "\n", |
11 |
| - "###What are the odds that an event will happen? Answering yes/no questions.\n", |
12 |
| - "\n" |
13 |
| - ] |
14 |
| - }, |
15 |
| - { |
16 |
| - "cell_type": "markdown", |
17 |
| - "metadata": {}, |
18 |
| - "source": [ |
19 |
| - "<img src=\"files/images/b1fig1_nfloutcomes.png\" />" |
20 |
| - ] |
21 |
| - }, |
22 |
| - { |
23 |
| - "cell_type": "markdown", |
24 |
| - "metadata": {}, |
25 |
| - "source": [ |
26 |
| - "<img src=\"files/images/b1fig2_nfloutcomes_withline.png\" />" |
27 |
| - ] |
28 |
| - }, |
29 |
| - { |
30 |
| - "cell_type": "markdown", |
31 |
| - "metadata": {}, |
32 |
| - "source": [ |
33 |
| - "\n", |
34 |
| - "<img src=\"files/images/standardSigmoidFunction.png\" />\n", |
35 |
| - "\n" |
36 |
| - ] |
37 |
| - }, |
38 |
| - { |
39 |
| - "cell_type": "markdown", |
40 |
| - "metadata": {}, |
41 |
| - "source": [ |
42 |
| - "\n", |
43 |
| - "\n", |
44 |
| - "\n" |
45 |
| - ] |
46 |
| - }, |
47 |
| - { |
48 |
| - "cell_type": "markdown", |
49 |
| - "metadata": {}, |
50 |
| - "source": [ |
51 |
| - "A function that has the above shape is:\n", |
52 |
| - "\n", |
53 |
| - "\n", |
54 |
| - "$$P(x) = \\frac{1}{1 + e^{b_0 + b_1x}}$$\n", |
55 |
| - "\n", |
56 |
| - "---\n", |
57 |
| - "where P(x) is the probability of a score of x leading to a win. \n", |
58 |
| - "$b_0, b_1$ are parameters that we will estimate, so the curve fits our data.\n", |
59 |
| - "\n", |
60 |
| - "\n", |
61 |
| - "\n", |
62 |
| - "\n" |
63 |
| - ] |
64 |
| - }, |
65 |
| - { |
66 |
| - "cell_type": "markdown", |
67 |
| - "metadata": {}, |
| 4 | + "cell_type": "code", |
| 5 | + "execution_count": 1, |
| 6 | + "metadata": { |
| 7 | + "collapsed": false |
| 8 | + }, |
| 9 | + "outputs": [ |
| 10 | + { |
| 11 | + "name": "stdout", |
| 12 | + "output_type": "stream", |
| 13 | + "text": [ |
| 14 | + "Populating the interactive namespace from numpy and matplotlib\n", |
| 15 | + "Coefficients: [ 72.88279832 -0.08844242]\n", |
| 16 | + "Intercept: 0.000210747768548\n", |
| 17 | + "P-Values: [ 0.00000000e+000 0.00000000e+000 5.96972978e-203]\n", |
| 18 | + "R-Squared: 0.656632624649\n" |
| 19 | + ] |
| 20 | + } |
| 21 | + ], |
68 | 22 | "source": [
|
69 |
| - "---\n", |
70 |
| - "\n", |
71 |
| - "\n", |
72 |
| - "\n", |
73 |
| - "\n", |
74 |
| - "\n", |
75 |
| - "\n", |
76 |
| - "\n", |
77 |
| - "\n", |
78 |
| - "\n", |
79 |
| - "\n", |
80 |
| - "---\n", |
81 |
| - "\n", |
82 |
| - "\n", |
83 |
| - "\n", |
84 |
| - "\n", |
85 |
| - "\n", |
86 |
| - "\n", |
87 |
| - "\n", |
88 |
| - "\n", |
89 |
| - "\n", |
90 |
| - "\n", |
91 |
| - "\n", |
92 |
| - "\n", |
93 |
| - "\n", |
94 |
| - "\n", |
95 |
| - "\n" |
| 23 | + "%pylab inline\n", |
| 24 | + "import pylab as pl\n", |
| 25 | + "import numpy as np\n", |
| 26 | + "#from sklearn import datasets, linear_model\n", |
| 27 | + "import pandas as pd\n", |
| 28 | + "import statsmodels.api as sm\n", |
| 29 | + "\n", |
| 30 | + "# import the cleaned up dataset\n", |
| 31 | + "df = pd.read_csv('../datasets/loanf.csv')\n", |
| 32 | + "\n", |
| 33 | + "intrate = df['Interest.Rate']\n", |
| 34 | + "loanamt = df['Loan.Amount']\n", |
| 35 | + "fico = df['FICO.Score']\n", |
| 36 | + "\n", |
| 37 | + "# reshape the data from a pandas Series to columns \n", |
| 38 | + "# the dependent variable\n", |
| 39 | + "y = np.matrix(intrate).transpose()\n", |
| 40 | + "# the independent variables shaped as columns\n", |
| 41 | + "x1 = np.matrix(fico).transpose()\n", |
| 42 | + "x2 = np.matrix(loanamt).transpose()\n", |
| 43 | + "\n", |
| 44 | + "# put the two columns together to create an input matrix \n", |
| 45 | + "# if we had n independent variables we would have n columns here\n", |
| 46 | + "x = np.column_stack([x1,x2])\n", |
| 47 | + "\n", |
| 48 | + "# create a linear model and fit it to the data\n", |
| 49 | + "X = sm.add_constant(x)\n", |
| 50 | + "model = sm.OLS(y,X)\n", |
| 51 | + "f = model.fit()\n", |
| 52 | + "\n", |
| 53 | + "print 'Coefficients: ', f.params[0:2]\n", |
| 54 | + "print 'Intercept: ', f.params[2]\n", |
| 55 | + "print 'P-Values: ', f.pvalues\n", |
| 56 | + "print 'R-Squared: ', f.rsquared\n" |
96 | 57 | ]
|
97 | 58 | },
|
98 | 59 | {
|
99 | 60 | "cell_type": "code",
|
100 |
| - "execution_count": 3, |
| 61 | + "execution_count": 1, |
101 | 62 | "metadata": {
|
102 | 63 | "collapsed": false
|
103 | 64 | },
|
|
167 | 128 | "</script>"
|
168 | 129 | ],
|
169 | 130 | "text/plain": [
|
170 |
| - "<IPython.core.display.HTML at 0x109391790>" |
| 131 | + "<IPython.core.display.HTML at 0x10931ba90>" |
171 | 132 | ]
|
172 | 133 | },
|
173 |
| - "execution_count": 3, |
| 134 | + "execution_count": 1, |
174 | 135 | "metadata": {},
|
175 | 136 | "output_type": "execute_result"
|
176 | 137 | }
|
177 | 138 | ],
|
178 | 139 | "source": [
|
179 |
| - "\n", |
180 |
| - "\n", |
181 |
| - "\n", |
182 |
| - "\n", |
183 |
| - "\n", |
184 |
| - "\n", |
185 |
| - "\n", |
186 |
| - "\n", |
187 |
| - "\n", |
188 |
| - "\n", |
189 |
| - "\n", |
190 |
| - "\n", |
191 | 140 | "from IPython.core.display import HTML\n",
|
192 | 141 | "def css_styling():\n",
|
193 | 142 | " styles = open(\"../styles/custom.css\", \"r\").read()\n",
|
|
197 | 146 | },
|
198 | 147 | {
|
199 | 148 | "cell_type": "code",
|
200 |
| - "execution_count": 3, |
| 149 | + "execution_count": null, |
201 | 150 | "metadata": {
|
202 | 151 | "collapsed": false
|
203 | 152 | },
|
|
0 commit comments