| 
33 | 33 |       "url = \"https://dl.dropbox.com/s/lnly9gw8pb1xhir/overfitting.zip\"\n",  | 
34 | 34 |       "\n",  | 
35 | 35 |       "\n",  | 
36 |  | -      "results = requests.get(url);"  | 
 | 36 | +      "results = requests.get(url)"  | 
37 | 37 |      ],  | 
38 | 38 |      "language": "python",  | 
39 | 39 |      "metadata": {},  | 
 | 
46 | 46 |      "input": [  | 
47 | 47 |       "import StringIO\n",  | 
48 | 48 |       "z = zipfile.ZipFile(StringIO.StringIO(results.content))\n",  | 
49 |  | -      "# z.extractall();;"  | 
 | 49 | +      "# z.extractall()"  | 
50 | 50 |      ],  | 
51 | 51 |      "language": "python",  | 
52 | 52 |      "metadata": {},  | 
 | 
57 | 57 |      "cell_type": "code",  | 
58 | 58 |      "collapsed": false,  | 
59 | 59 |      "input": [  | 
60 |  | -      "z.extractall();"  | 
 | 60 | +      "z.extractall()"  | 
61 | 61 |      ],  | 
62 | 62 |      "language": "python",  | 
63 | 63 |      "metadata": {},  | 
 | 
68 | 68 |      "cell_type": "code",  | 
69 | 69 |      "collapsed": false,  | 
70 | 70 |      "input": [  | 
71 |  | -      "z.namelist();"  | 
 | 71 | +      "z.namelist()"  | 
72 | 72 |      ],  | 
73 | 73 |      "language": "python",  | 
74 | 74 |      "metadata": {},  | 
 | 
88 | 88 |      "collapsed": false,  | 
89 | 89 |      "input": [  | 
90 | 90 |       "d = z.open('overfitting.csv')\n",  | 
91 |  | -      "d.readline();"  | 
 | 91 | +      "d.readline()"  | 
92 | 92 |      ],  | 
93 | 93 |      "language": "python",  | 
94 | 94 |      "metadata": {},  | 
 | 
107 | 107 |      "cell_type": "code",  | 
108 | 108 |      "collapsed": false,  | 
109 | 109 |      "input": [  | 
110 |  | -      "import numpy as np;"  | 
 | 110 | +      "import numpy as np"  | 
111 | 111 |      ],  | 
112 | 112 |      "language": "python",  | 
113 | 113 |      "metadata": {},  | 
 | 
118 | 118 |      "cell_type": "code",  | 
119 | 119 |      "collapsed": false,  | 
120 | 120 |      "input": [  | 
121 |  | -      "M = np.fromstring(d.read(), sep=\",\");"  | 
 | 121 | +      "M = np.fromstring(d.read(), sep=\",\")"  | 
122 | 122 |      ],  | 
123 | 123 |      "language": "python",  | 
124 | 124 |      "metadata": {},  | 
 | 
129 | 129 |      "cell_type": "code",  | 
130 | 130 |      "collapsed": false,  | 
131 | 131 |      "input": [  | 
132 |  | -      "len(d.read());"  | 
 | 132 | +      "len(d.read())"  | 
133 | 133 |      ],  | 
134 | 134 |      "language": "python",  | 
135 | 135 |      "metadata": {},  | 
 | 
167 | 167 |      "cell_type": "code",  | 
168 | 168 |      "collapsed": false,  | 
169 | 169 |      "input": [  | 
170 |  | -      "data = np.loadtxt(\"overfitting.csv\", delimiter=\",\", skiprows=1);"  | 
 | 170 | +      "data = np.loadtxt(\"overfitting.csv\", delimiter=\",\", skiprows=1)"  | 
171 | 171 |      ],  | 
172 | 172 |      "language": "python",  | 
173 | 173 |      "metadata": {},  | 
 | 
193 | 193 |       "\n",  | 
194 | 194 |       "\"\"\"\n",  | 
195 | 195 |       "\n",  | 
196 |  | -      "data.shape;"  | 
 | 196 | +      "data.shape"  | 
197 | 197 |      ],  | 
198 | 198 |      "language": "python",  | 
199 | 199 |      "metadata": {},  | 
 | 
242 | 242 |       "testing_labels = data[ix_testing, 2]\n",  | 
243 | 243 |       "\n",  | 
244 | 244 |       "print \"training:\", training_data.shape, training_labels.shape\n",  | 
245 |  | -      "print \"testing: \", testing_data.shape, testing_labels.shape;"  | 
 | 245 | +      "print \"testing: \", testing_data.shape, testing_labels.shape"  | 
246 | 246 |      ],  | 
247 | 247 |      "language": "python",  | 
248 | 248 |      "metadata": {},  | 
 | 
278 | 278 |      "cell_type": "code",  | 
279 | 279 |      "collapsed": false,  | 
280 | 280 |      "input": [  | 
281 |  | -      "figsize(12, 4);"  | 
 | 281 | +      "figsize(12, 4)"  | 
282 | 282 |      ],  | 
283 | 283 |      "language": "python",  | 
284 | 284 |      "metadata": {},  | 
 | 
290 | 290 |      "collapsed": false,  | 
291 | 291 |      "input": [  | 
292 | 292 |       "hist(training_data.flatten())\n",  | 
293 |  | -      "print training_data.shape[0] * training_data.shape[1];"  | 
 | 293 | +      "print training_data.shape[0] * training_data.shape[1]"  | 
294 | 294 |      ],  | 
295 | 295 |      "language": "python",  | 
296 | 296 |      "metadata": {},  | 
 | 
322 | 322 |      "input": [  | 
323 | 323 |       "import pymc as pm\n",  | 
324 | 324 |       "\n",  | 
325 |  | -      "to_include = pm.Bernoulli(\"to_include\", 0.5, size=200);"  | 
 | 325 | +      "to_include = pm.Bernoulli(\"to_include\", 0.5, size=200)"  | 
326 | 326 |      ],  | 
327 | 327 |      "language": "python",  | 
328 | 328 |      "metadata": {},  | 
 | 
333 | 333 |      "cell_type": "code",  | 
334 | 334 |      "collapsed": false,  | 
335 | 335 |      "input": [  | 
336 |  | -      "coef = pm.Uniform(\"coefs\", 0, 1, size=200);"  | 
 | 336 | +      "coef = pm.Uniform(\"coefs\", 0, 1, size=200)"  | 
337 | 337 |      ],  | 
338 | 338 |      "language": "python",  | 
339 | 339 |      "metadata": {},  | 
 | 
347 | 347 |       "@pm.deterministic\n",  | 
348 | 348 |       "def Z(coef=coef, to_include=to_include, data=training_data):\n",  | 
349 | 349 |       "    ym = np.dot(to_include * training_data, coef)\n",  | 
350 |  | -      "    return ym - ym.mean();"  | 
 | 350 | +      "    return ym - ym.mean()"  | 
351 | 351 |      ],  | 
352 | 352 |      "language": "python",  | 
353 | 353 |      "metadata": {},  | 
 | 
360 | 360 |      "input": [  | 
361 | 361 |       "@pm.deterministic\n",  | 
362 | 362 |       "def T(z=Z):\n",  | 
363 |  | -      "    return 0.45 * (np.sign(z) + 1.1);"  | 
 | 363 | +      "    return 0.45 * (np.sign(z) + 1.1)"  | 
364 | 364 |      ],  | 
365 | 365 |      "language": "python",  | 
366 | 366 |      "metadata": {},  | 
 | 
375 | 375 |       "\n",  | 
376 | 376 |       "model = pm.Model([to_include, coef, Z, T, obs])\n",  | 
377 | 377 |       "map_ = pm.MAP(model)\n",  | 
378 |  | -      "map_.fit();"  | 
 | 378 | +      "map_.fit()"  | 
379 | 379 |      ],  | 
380 | 380 |      "language": "python",  | 
381 | 381 |      "metadata": {},  | 
 | 
394 | 394 |      "cell_type": "code",  | 
395 | 395 |      "collapsed": false,  | 
396 | 396 |      "input": [  | 
397 |  | -      "mcmc = pm.MCMC(model);"  | 
 | 397 | +      "mcmc = pm.MCMC(model)"  | 
398 | 398 |      ],  | 
399 | 399 |      "language": "python",  | 
400 | 400 |      "metadata": {},  | 
 | 
405 | 405 |      "cell_type": "code",  | 
406 | 406 |      "collapsed": false,  | 
407 | 407 |      "input": [  | 
408 |  | -      "mcmc.sample(100000, 90000, 1);"  | 
 | 408 | +      "mcmc.sample(100000, 90000, 1)"  | 
409 | 409 |      ],  | 
410 | 410 |      "language": "python",  | 
411 | 411 |      "metadata": {},  | 
 | 
432 | 432 |      "cell_type": "code",  | 
433 | 433 |      "collapsed": false,  | 
434 | 434 |      "input": [  | 
435 |  | -      "(np.round(T.value) == training_labels).mean();"  | 
 | 435 | +      "(np.round(T.value) == training_labels).mean()"  | 
436 | 436 |      ],  | 
437 | 437 |      "language": "python",  | 
438 | 438 |      "metadata": {},  | 
 | 
452 | 452 |      "collapsed": false,  | 
453 | 453 |      "input": [  | 
454 | 454 |       "t_trace = mcmc.trace(\"T\")[:]\n",  | 
455 |  | -      "(np.round(t_trace[-500:-400, :]).mean(axis=0) == training_labels).mean();"  | 
 | 455 | +      "(np.round(t_trace[-500:-400, :]).mean(axis=0) == training_labels).mean()"  | 
456 | 456 |      ],  | 
457 | 457 |      "language": "python",  | 
458 | 458 |      "metadata": {},  | 
 | 
471 | 471 |      "cell_type": "code",  | 
472 | 472 |      "collapsed": false,  | 
473 | 473 |      "input": [  | 
474 |  | -      "t_mean = np.round(t_trace).mean(axis=1);"  | 
 | 474 | +      "t_mean = np.round(t_trace).mean(axis=1)"  | 
475 | 475 |      ],  | 
476 | 476 |      "language": "python",  | 
477 | 477 |      "metadata": {},  | 
 | 
483 | 483 |      "collapsed": false,  | 
484 | 484 |      "input": [  | 
485 | 485 |       "imshow(t_trace[-10000:, :], aspect=\"auto\")\n",  | 
486 |  | -      "colorbar();"  | 
 | 486 | +      "colorbar()"  | 
487 | 487 |      ],  | 
488 | 488 |      "language": "python",  | 
489 | 489 |      "metadata": {},  | 
 | 
508 | 508 |      "input": [  | 
509 | 509 |       "figsize(23, 8)\n",  | 
510 | 510 |       "coef_trace = mcmc.trace(\"coefs\")[:]\n",  | 
511 |  | -      "imshow(coef_trace[-10000:, :], aspect=\"auto\", cmap=pyplot.cm.RdBu, interpolation=\"none\");"  | 
 | 511 | +      "imshow(coef_trace[-10000:, :], aspect=\"auto\", cmap=pyplot.cm.RdBu, interpolation=\"none\")"  | 
512 | 512 |      ],  | 
513 | 513 |      "language": "python",  | 
514 | 514 |      "metadata": {},  | 
 | 
531 | 531 |      "cell_type": "code",  | 
532 | 532 |      "collapsed": false,  | 
533 | 533 |      "input": [  | 
534 |  | -      "include_trace = mcmc.trace(\"to_include\")[:];"  | 
 | 534 | +      "include_trace = mcmc.trace(\"to_include\")[:]"  | 
535 | 535 |      ],  | 
536 | 536 |      "language": "python",  | 
537 | 537 |      "metadata": {},  | 
 | 
543 | 543 |      "collapsed": false,  | 
544 | 544 |      "input": [  | 
545 | 545 |       "figsize(23, 8)\n",  | 
546 |  | -      "imshow(include_trace[-10000:, :], aspect=\"auto\", interpolation=\"none\");"  | 
 | 546 | +      "imshow(include_trace[-10000:, :], aspect=\"auto\", interpolation=\"none\")"  | 
547 | 547 |      ],  | 
548 | 548 |      "language": "python",  | 
549 | 549 |      "metadata": {},  | 
 | 
0 commit comments