|
33 | 33 | "url = \"https://dl.dropbox.com/s/lnly9gw8pb1xhir/overfitting.zip\"\n", |
34 | 34 | "\n", |
35 | 35 | "\n", |
36 | | - "results = requests.get(url)\n", |
37 | | - "\n" |
| 36 | + "results = requests.get(url)" |
38 | 37 | ], |
39 | 38 | "language": "python", |
40 | 39 | "metadata": {}, |
|
47 | 46 | "input": [ |
48 | 47 | "import StringIO\n", |
49 | 48 | "z = zipfile.ZipFile(StringIO.StringIO(results.content))\n", |
50 | | - "#z.extractall()\n", |
51 | | - "\n" |
| 49 | + "# z.extractall()" |
52 | 50 | ], |
53 | 51 | "language": "python", |
54 | 52 | "metadata": {}, |
|
120 | 118 | "cell_type": "code", |
121 | 119 | "collapsed": false, |
122 | 120 | "input": [ |
123 | | - "M = np.fromstring(d.read(), sep=\",\" )" |
| 121 | + "M = np.fromstring(d.read(), sep=\",\")" |
124 | 122 | ], |
125 | 123 | "language": "python", |
126 | 124 | "metadata": {}, |
|
234 | 232 | "cell_type": "code", |
235 | 233 | "collapsed": false, |
236 | 234 | "input": [ |
237 | | - "ix_training = data[:,1] == 1\n", |
238 | | - "ix_testing = data[:,1] == 0\n", |
| 235 | + "ix_training = data[:, 1] == 1\n", |
| 236 | + "ix_testing = data[:, 1] == 0\n", |
239 | 237 | "\n", |
240 | | - "training_data = data[ ix_training, 5: ]\n", |
241 | | - "testing_data = data[ ix_testing, 5: ]\n", |
| 238 | + "training_data = data[ix_training, 5:]\n", |
| 239 | + "testing_data = data[ix_testing, 5:]\n", |
242 | 240 | "\n", |
243 | | - "training_labels = data[ ix_training, 2]\n", |
244 | | - "testing_labels = data[ ix_testing, 2]\n", |
| 241 | + "training_labels = data[ix_training, 2]\n", |
| 242 | + "testing_labels = data[ix_testing, 2]\n", |
245 | 243 | "\n", |
246 | 244 | "print \"training:\", training_data.shape, training_labels.shape\n", |
247 | 245 | "print \"testing: \", testing_data.shape, testing_labels.shape" |
|
280 | 278 | "cell_type": "code", |
281 | 279 | "collapsed": false, |
282 | 280 | "input": [ |
283 | | - "figsize( 12, 4 )" |
| 281 | + "figsize(12, 4)" |
284 | 282 | ], |
285 | 283 | "language": "python", |
286 | 284 | "metadata": {}, |
|
291 | 289 | "cell_type": "code", |
292 | 290 | "collapsed": false, |
293 | 291 | "input": [ |
294 | | - "hist( training_data.flatten() )\n", |
295 | | - "print training_data.shape[0]*training_data.shape[1]" |
| 292 | + "hist(training_data.flatten())\n", |
| 293 | + "print training_data.shape[0] * training_data.shape[1]" |
296 | 294 | ], |
297 | 295 | "language": "python", |
298 | 296 | "metadata": {}, |
|
324 | 322 | "input": [ |
325 | 323 | "import pymc as pm\n", |
326 | 324 | "\n", |
327 | | - "to_include = pm.Bernoulli( \"to_include\", 0.5, size= 200 )" |
| 325 | + "to_include = pm.Bernoulli(\"to_include\", 0.5, size=200)" |
328 | 326 | ], |
329 | 327 | "language": "python", |
330 | 328 | "metadata": {}, |
|
335 | 333 | "cell_type": "code", |
336 | 334 | "collapsed": false, |
337 | 335 | "input": [ |
338 | | - "coef = pm.Uniform( \"coefs\", 0, 1, size = 200 )" |
| 336 | + "coef = pm.Uniform(\"coefs\", 0, 1, size=200)" |
339 | 337 | ], |
340 | 338 | "language": "python", |
341 | 339 | "metadata": {}, |
|
347 | 345 | "collapsed": false, |
348 | 346 | "input": [ |
349 | 347 | "@pm.deterministic\n", |
350 | | - "def Z( coef = coef, to_include = to_include, data = training_data ):\n", |
351 | | - " ym = np.dot( to_include*training_data, coef )\n", |
| 348 | + "def Z(coef=coef, to_include=to_include, data=training_data):\n", |
| 349 | + " ym = np.dot(to_include * training_data, coef)\n", |
352 | 350 | " return ym - ym.mean()" |
353 | 351 | ], |
354 | 352 | "language": "python", |
|
361 | 359 | "collapsed": false, |
362 | 360 | "input": [ |
363 | 361 | "@pm.deterministic\n", |
364 | | - "def T( z = Z ):\n", |
365 | | - " return 0.45*(np.sign(z) + 1.1)" |
| 362 | + "def T(z=Z):\n", |
| 363 | + " return 0.45 * (np.sign(z) + 1.1)" |
366 | 364 | ], |
367 | 365 | "language": "python", |
368 | 366 | "metadata": {}, |
|
373 | 371 | "cell_type": "code", |
374 | 372 | "collapsed": false, |
375 | 373 | "input": [ |
376 | | - "obs = pm.Bernoulli( \"obs\", T, value = training_labels, observed = True)\n", |
| 374 | + "obs = pm.Bernoulli(\"obs\", T, value=training_labels, observed=True)\n", |
377 | 375 | "\n", |
378 | | - "model = pm.Model( [to_include, coef, Z, T, obs] )\n", |
379 | | - "map_ = pm.MAP( model )\n", |
| 376 | + "model = pm.Model([to_include, coef, Z, T, obs])\n", |
| 377 | + "map_ = pm.MAP(model)\n", |
380 | 378 | "map_.fit()" |
381 | 379 | ], |
382 | 380 | "language": "python", |
|
396 | 394 | "cell_type": "code", |
397 | 395 | "collapsed": false, |
398 | 396 | "input": [ |
399 | | - "mcmc = pm.MCMC( model )" |
| 397 | + "mcmc = pm.MCMC(model)" |
400 | 398 | ], |
401 | 399 | "language": "python", |
402 | 400 | "metadata": {}, |
|
407 | 405 | "cell_type": "code", |
408 | 406 | "collapsed": false, |
409 | 407 | "input": [ |
410 | | - "mcmc.sample(100000, 90000,1) " |
| 408 | + "mcmc.sample(100000, 90000, 1)" |
411 | 409 | ], |
412 | 410 | "language": "python", |
413 | 411 | "metadata": {}, |
|
434 | 432 | "cell_type": "code", |
435 | 433 | "collapsed": false, |
436 | 434 | "input": [ |
437 | | - "(np.round(T.value) == training_labels ).mean()" |
| 435 | + "(np.round(T.value) == training_labels).mean()" |
438 | 436 | ], |
439 | 437 | "language": "python", |
440 | 438 | "metadata": {}, |
|
454 | 452 | "collapsed": false, |
455 | 453 | "input": [ |
456 | 454 | "t_trace = mcmc.trace(\"T\")[:]\n", |
457 | | - "(np.round( t_trace[-500:-400,:]).mean(axis=0) == training_labels ).mean()" |
| 455 | + "(np.round(t_trace[-500:-400, :]).mean(axis=0) == training_labels).mean()" |
458 | 456 | ], |
459 | 457 | "language": "python", |
460 | 458 | "metadata": {}, |
|
473 | 471 | "cell_type": "code", |
474 | 472 | "collapsed": false, |
475 | 473 | "input": [ |
476 | | - "t_mean = np.round( t_trace).mean(axis=1)\n" |
| 474 | + "t_mean = np.round(t_trace).mean(axis=1)" |
477 | 475 | ], |
478 | 476 | "language": "python", |
479 | 477 | "metadata": {}, |
|
484 | 482 | "cell_type": "code", |
485 | 483 | "collapsed": false, |
486 | 484 | "input": [ |
487 | | - "imshow(t_trace[-10000:,:], aspect=\"auto\")\n", |
| 485 | + "imshow(t_trace[-10000:, :], aspect=\"auto\")\n", |
488 | 486 | "colorbar()" |
489 | 487 | ], |
490 | 488 | "language": "python", |
|
508 | 506 | "cell_type": "code", |
509 | 507 | "collapsed": false, |
510 | 508 | "input": [ |
511 | | - "figsize( 23, 8)\n", |
| 509 | + "figsize(23, 8)\n", |
512 | 510 | "coef_trace = mcmc.trace(\"coefs\")[:]\n", |
513 | | - "imshow(coef_trace[-10000:,:], aspect=\"auto\", cmap=pyplot.cm.RdBu, interpolation=\"none\")" |
| 511 | + "imshow(coef_trace[-10000:, :], aspect=\"auto\", cmap=pyplot.cm.RdBu, interpolation=\"none\")" |
514 | 512 | ], |
515 | 513 | "language": "python", |
516 | 514 | "metadata": {}, |
|
544 | 542 | "cell_type": "code", |
545 | 543 | "collapsed": false, |
546 | 544 | "input": [ |
547 | | - "figsize( 23, 8)\n", |
548 | | - "imshow(include_trace[-10000:,:], aspect=\"auto\", interpolation=\"none\")" |
| 545 | + "figsize(23, 8)\n", |
| 546 | + "imshow(include_trace[-10000:, :], aspect=\"auto\", interpolation=\"none\")" |
549 | 547 | ], |
550 | 548 | "language": "python", |
551 | 549 | "metadata": {}, |
|
0 commit comments