|
254 | 254 | "\n", |
255 | 255 | "@pm.potential\n", |
256 | 256 | "def error(true_price=true_price, price_estimate=price_estimate):\n", |
257 | | - " return pm.normal_like(true_price, price_estimate, 1 / (3e3) ** 2)\n", |
| 257 | + " return pm.normal_like(true_price, price_estimate, 1 / (3e3) ** 2)\n", |
258 | 258 | "\n", |
259 | 259 | "\n", |
260 | 260 | "mcmc = pm.MCMC([true_price, prize_1, prize_2, price_estimate, error])\n", |
261 | 261 | "mcmc.sample(50000, 10000)\n", |
262 | 262 | "\n", |
263 | | - "price_trace = mcmc.trace(\"true_price\")[:];" |
| 263 | + "price_trace = mcmc.trace(\"true_price\")[:]" |
264 | 264 | ], |
265 | 265 | "language": "python", |
266 | 266 | "metadata": {}, |
|
299 | 299 | "plt.title(\"Posterior of the true price estimate\")\n", |
300 | 300 | "plt.vlines(mu_prior, 0, 1.1 * np.max(_hist[0]), label=\"prior's mean\",\n", |
301 | 301 | " linestyles=\"--\")\n", |
302 | | - "plt.vlines(price_trace.mean(), 0, 1.1 * np.max(_hist[0]), label=\"posterior's mean\", linestyles=\"-.\")\n", |
303 | | - "plt.legend(loc=\"upper left\");" |
| 302 | + "plt.vlines(price_trace.mean(), 0, 1.1 * np.max(_hist[0]),\n", |
| 303 | + " label=\"posterior's mean\", linestyles=\"-.\")\n", |
| 304 | + "plt.legend(loc=\"upper left\")" |
304 | 305 | ], |
305 | 306 | "language": "python", |
306 | 307 | "metadata": {}, |
|
361 | 362 | "\n", |
362 | 363 | "\n", |
363 | 364 | "def showdown_loss(guess, true_price, risk=80000):\n", |
364 | | - " loss = np.zeros_like(true_price)\n", |
365 | | - " ix = true_price < guess\n", |
366 | | - " loss[~ix] = np.abs(guess - true_price[~ix])\n", |
367 | | - " close_mask = [abs(true_price - guess) <= 250]\n", |
368 | | - " loss[close_mask] = -2 * true_price[close_mask]\n", |
369 | | - " loss[ix] = risk\n", |
370 | | - " return loss\n", |
| 365 | + " loss = np.zeros_like(true_price)\n", |
| 366 | + " ix = true_price < guess\n", |
| 367 | + " loss[~ix] = np.abs(guess - true_price[~ix])\n", |
| 368 | + " close_mask = [abs(true_price - guess) <= 250]\n", |
| 369 | + " loss[close_mask] = -2 * true_price[close_mask]\n", |
| 370 | + " loss[ix] = risk\n", |
| 371 | + " return loss\n", |
| 372 | + "\n", |
371 | 373 | "\n", |
372 | 374 | "guesses = np.linspace(5000, 50000, 70)\n", |
373 | 375 | "risks = np.linspace(30000, 150000, 6)\n", |
374 | | - "expected_loss = lambda guess, risk: showdown_loss(guess, price_trace, risk).mean()\n", |
| 376 | + "expected_loss = lambda guess, risk: \\\n", |
| 377 | + " showdown_loss(guess, price_trace, risk).mean()\n", |
375 | 378 | "\n", |
376 | 379 | "for _p in risks:\n", |
377 | 380 | " results = [expected_loss(_g, _p) for _g in guesses]\n", |
378 | 381 | " plt.plot(guesses, results, label=\"%d\" % _p)\n", |
379 | 382 | "\n", |
380 | | - "plt.title(\"Expected loss of different guesses, \\nvarious risk-levels of overestimating\")\n", |
| 383 | + "plt.title(\"Expected loss of different guesses, \\nvarious risk-levels of \\\n", |
| 384 | + "overestimating\")\n", |
381 | 385 | "plt.legend(loc=\"upper left\", title=\"Risk parameter\")\n", |
382 | 386 | "plt.xlabel(\"price bid\")\n", |
383 | 387 | "plt.ylabel(\"expected loss\")\n", |
|
420 | 424 | "\n", |
421 | 425 | "ax = plt.subplot(111)\n", |
422 | 426 | "\n", |
| 427 | + "\n", |
423 | 428 | "for _p in risks:\n", |
424 | 429 | " _color = ax._get_lines.color_cycle.next()\n", |
425 | 430 | " _min_results = sop.fmin(expected_loss, 15000, args=(_p,), disp=False)\n", |
426 | 431 | " _results = [expected_loss(_g, _p) for _g in guesses]\n", |
427 | 432 | " plt.plot(guesses, _results, color=_color)\n", |
428 | | - " plt.scatter(_min_results, 0, s=60, color=_color, label=\"%d\" % _p)\n", |
| 433 | + " plt.scatter(_min_results, 0, s=60,\n", |
| 434 | + " color=_color, label=\"%d\" % _p)\n", |
429 | 435 | " plt.vlines(_min_results, 0, 120000, color=_color, linestyles=\"--\")\n", |
430 | 436 | " print \"minimum at risk %d: %.2f\" % (_p, _min_results)\n", |
431 | 437 | "\n", |
432 | | - "plt.title(\"Expected loss & Bayes actions of different guesses, \\n various risk-levels of overestimating\")\n", |
| 438 | + "plt.title(\"Expected loss & Bayes actions of different guesses, \\n \\\n", |
| 439 | + "various risk-levels of overestimating\")\n", |
433 | 440 | "plt.legend(loc=\"upper left\", scatterpoints=1, title=\"Bayes action at risk:\")\n", |
434 | 441 | "plt.xlabel(\"price guess\")\n", |
435 | 442 | "plt.ylabel(\"expected loss\")\n", |
436 | 443 | "plt.xlim(7000, 30000)\n", |
437 | | - "plt.ylim(-1000, 80000);" |
| 444 | + "plt.ylim(-1000, 80000)" |
438 | 445 | ], |
439 | 446 | "language": "python", |
440 | 447 | "metadata": {}, |
|
575 | 582 | "\n", |
576 | 583 | "def stock_loss(true_return, yhat, alpha=100.):\n", |
577 | 584 | " if true_return * yhat < 0:\n", |
578 | | - " # opposite signs, not good\n", |
579 | | - " return alpha * yhat ** 2 - np.sign(true_return) * yhat + abs(true_return)\n", |
| 585 | + " # opposite signs, not good\n", |
| 586 | + " return alpha * yhat ** 2 - np.sign(true_return) * yhat \\\n", |
| 587 | + " + abs(true_return)\n", |
580 | 588 | " else:\n", |
581 | 589 | " return abs(true_return - yhat)\n", |
582 | 590 | "\n", |
| 591 | + "\n", |
583 | 592 | "true_value = .05\n", |
584 | 593 | "pred = np.linspace(-.04, .12, 75)\n", |
585 | 594 | "\n", |
586 | | - "plt.plot(pred, [stock_loss(true_value, _p) for _p in pred], label=\"Loss associated with\\n prediction if true value=0.05\", lw=3)\n", |
| 595 | + "plt.plot(pred, [stock_loss(true_value, _p) for _p in pred],\n", |
| 596 | + " label=\"Loss associated with\\n prediction if true value = 0.05\", lw=3)\n", |
587 | 597 | "plt.vlines(0, 0, .25, linestyles=\"--\")\n", |
588 | 598 | "\n", |
589 | 599 | "plt.xlabel(\"prediction\")\n", |
|
592 | 602 | "plt.ylim(0, 0.25)\n", |
593 | 603 | "\n", |
594 | 604 | "true_value = -.02\n", |
595 | | - "plt.plot(pred, [stock_loss(true_value, _p) for _p in pred], alpha=0.6, label=\"Loss associated with\\n prediction if true value=-0.02\", lw=3)\n", |
| 605 | + "plt.plot(pred, [stock_loss(true_value, _p) for _p in pred], alpha=0.6,\n", |
| 606 | + " label=\"Loss associated with\\n prediction if true value = -0.02\", lw=3)\n", |
596 | 607 | "plt.legend()\n", |
597 | | - "plt.title(\"Stock returns loss if true value=0.05, -0.02\");" |
| 608 | + "plt.title(\"Stock returns loss if true value = 0.05, -0.02\");" |
598 | 609 | ], |
599 | 610 | "language": "python", |
600 | 611 | "metadata": {}, |
|
695 | 706 | "mcmc = pm.MCMC([obs, beta, alpha, std, prec])\n", |
696 | 707 | "\n", |
697 | 708 | "mcmc.sample(100000, 80000)\n", |
698 | | - "mcplot(mcmc);" |
| 709 | + "mcplot(mcmc)" |
699 | 710 | ], |
700 | 711 | "language": "python", |
701 | 712 | "metadata": {}, |
|
800 | 811 | "\n", |
801 | 812 | "noise = 1. / np.sqrt(tau_samples) * np.random.randn(N)\n", |
802 | 813 | "\n", |
803 | | - "possible_outcomes = lambda signal: alpha_samples + beta_samples * signal + noise\n", |
| 814 | + "possible_outcomes = lambda signal: alpha_samples + beta_samples * signal \\\n", |
| 815 | + " + noise\n", |
804 | 816 | "\n", |
805 | 817 | "\n", |
806 | 818 | "opt_predictions = np.zeros(50)\n", |
|
894 | 906 | "from draw_sky2 import draw_sky\n", |
895 | 907 | "\n", |
896 | 908 | "n_sky = 3 # choose a file/sky to examine.\n", |
897 | | - "data = np.genfromtxt(\"data/Train_Skies/Train_Skies/Training_Sky%d.csv\" % (n_sky),\n", |
898 | | - " dtype=None,\n", |
899 | | - " skip_header=1,\n", |
900 | | - " delimiter=\",\",\n", |
901 | | - " usecols=[1, 2, 3, 4])\n", |
| 909 | + "data = np.genfromtxt(\"data/Train_Skies/Train_Skies/\\\n", |
| 910 | + "Training_Sky%d.csv\" % (n_sky),\n", |
| 911 | + " dtype=None,\n", |
| 912 | + " skip_header=1,\n", |
| 913 | + " delimiter=\",\",\n", |
| 914 | + " usecols=[1, 2, 3, 4])\n", |
902 | 915 | "print \"Data on galaxies in sky %d.\" % n_sky\n", |
903 | 916 | "print \"position_x, position_y, e_1, e_2 \"\n", |
904 | 917 | "print data[:3]\n", |
|
952 | 965 | "\n", |
953 | 966 | "and in PyMC, \n", |
954 | 967 | "\n", |
955 | | - " exp_mass_large = pm.Uniform( \"exp_mass_large\", 40, 180)\n", |
| 968 | + " exp_mass_large = pm.Uniform(\"exp_mass_large\", 40, 180)\n", |
956 | 969 | " @pm.deterministic\n", |
957 | 970 | " def mass_large(u = exp_mass_large):\n", |
958 | | - " return np.log( u )\n", |
| 971 | + " return np.log(u)\n", |
959 | 972 | "\n", |
960 | 973 | "(This is what we mean when we say *log*-uniform.) For smaller galaxies, Tim set the mass to be the logarithm of 20. Why did Tim not create a prior for the smaller mass, nor treat it as a unknown? I believe this decision was made to speed up convergence of the algorithm. This is not too restrictive, as by construction the smaller halos have less influence on the galaxies.\n", |
961 | 974 | "\n", |
|
1013 | 1026 | "\n", |
1014 | 1027 | "@pm.deterministic\n", |
1015 | 1028 | "def mean(mass=mass_large, h_pos=halo_position, glx_pos=data[:, :2]):\n", |
1016 | | - " return mass / f_distance(glx_pos, h_pos, 240) * tangential_distance(glx_pos, h_pos);" |
| 1029 | + " return mass / f_distance(glx_pos, h_pos, 240) *\\\n", |
| 1030 | + " tangential_distance(glx_pos, h_pos)" |
1017 | 1031 | ], |
1018 | 1032 | "language": "python", |
1019 | 1033 | "metadata": {}, |
|
1029 | 1043 | "mcmc = pm.MCMC([ellpty, mean, halo_position, mass_large])\n", |
1030 | 1044 | "map_ = pm.MAP([ellpty, mean, halo_position, mass_large])\n", |
1031 | 1045 | "map_.fit()\n", |
1032 | | - "mcmc.sample(200000, 140000, 3);" |
| 1046 | + "mcmc.sample(200000, 140000, 3)" |
1033 | 1047 | ], |
1034 | 1048 | "language": "python", |
1035 | 1049 | "metadata": {}, |
|
1101 | 1115 | " delimiter=\",\",\n", |
1102 | 1116 | " usecols=[1, 2, 3, 4, 5, 6, 7, 8, 9],\n", |
1103 | 1117 | " skip_header=1)\n", |
1104 | | - "print halo_data[n_sky];" |
| 1118 | + "print halo_data[n_sky]" |
1105 | 1119 | ], |
1106 | 1120 | "language": "python", |
1107 | 1121 | "metadata": {}, |
|
1139 | 1153 | " c=\"k\", s=70)\n", |
1140 | 1154 | "plt.legend(scatterpoints=1, loc=\"lower left\")\n", |
1141 | 1155 | "plt.xlim(0, 4200)\n", |
1142 | | - "plt.ylim(0, 4200)\n", |
| 1156 | + "plt.ylim(0, 4200);\n", |
1143 | 1157 | "\n", |
1144 | | - "print \"True halo location:\", halo_data[n_sky][3], halo_data[n_sky][4];" |
| 1158 | + "print \"True halo location:\", halo_data[n_sky][3], halo_data[n_sky][4]" |
1145 | 1159 | ], |
1146 | 1160 | "language": "python", |
1147 | 1161 | "metadata": {}, |
|
1173 | 1187 | "collapsed": false, |
1174 | 1188 | "input": [ |
1175 | 1189 | "mean_posterior = t.mean(axis=0).reshape(1, 2)\n", |
1176 | | - "print mean_posterior;" |
| 1190 | + "print mean_posterior" |
1177 | 1191 | ], |
1178 | 1192 | "language": "python", |
1179 | 1193 | "metadata": {}, |
|
1204 | 1218 | "sky_prediction = mean_posterior\n", |
1205 | 1219 | "\n", |
1206 | 1220 | "print \"Using the mean:\"\n", |
1207 | | - "main_score(nhalo_all, x_true_all, y_true_all, x_ref_all, y_ref_all, sky_prediction)\n", |
| 1221 | + "main_score(nhalo_all, x_true_all, y_true_all,\n", |
| 1222 | + " x_ref_all, y_ref_all, sky_prediction)\n", |
1208 | 1223 | "\n", |
1209 | 1224 | "# what's a bad score?\n", |
1210 | 1225 | "print\n", |
1211 | 1226 | "random_guess = np.random.randint(0, 4200, size=(1, 2))\n", |
1212 | 1227 | "print \"Using a random location:\", random_guess\n", |
1213 | | - "main_score(nhalo_all, x_true_all, y_true_all, x_ref_all, y_ref_all, random_guess)\n", |
1214 | | - "print;" |
| 1228 | + "main_score(nhalo_all, x_true_all, y_true_all,\n", |
| 1229 | + " x_ref_all, y_ref_all, random_guess)\n", |
| 1230 | + "print" |
1215 | 1231 | ], |
1216 | 1232 | "language": "python", |
1217 | 1233 | "metadata": {}, |
|
1298 | 1314 | "\n", |
1299 | 1315 | " _sum = 0\n", |
1300 | 1316 | " for i in range(n_halos_in_sky):\n", |
1301 | | - " _sum += mass[i] / f_distance(glx_pos, h_pos[i, :], fdist_constants[i]) * tangential_distance(glx_pos, h_pos[i, :])\n", |
| 1317 | + " _sum += mass[i] / f_distance(glx_pos, h_pos[i, :], fdist_constants[i]) *\\\n", |
| 1318 | + " tangential_distance(glx_pos, h_pos[i, :])\n", |
1302 | 1319 | "\n", |
1303 | 1320 | " return _sum\n", |
1304 | 1321 | "\n", |
|
1310 | 1327 | "\n", |
1311 | 1328 | " mcmc = pm.MCMC([ellpty, mean, halo_positions, mass_large])\n", |
1312 | 1329 | " mcmc.sample(samples, burn_in, thin)\n", |
1313 | | - " return mcmc.trace(\"halo_positions\")[:];" |
| 1330 | + " return mcmc.trace(\"halo_positions\")[:]" |
1314 | 1331 | ], |
1315 | 1332 | "language": "python", |
1316 | 1333 | "metadata": {}, |
|
1322 | 1339 | "collapsed": false, |
1323 | 1340 | "input": [ |
1324 | 1341 | "n_sky = 215\n", |
1325 | | - "data = np.genfromtxt(\"data/Train_Skies/Train_Skies/Training_Sky%d.csv\" % (n_sky),\n", |
1326 | | - " dtype=None,\n", |
1327 | | - " skip_header=1,\n", |
1328 | | - " delimiter=\",\",\n", |
1329 | | - " usecols=[1, 2, 3, 4]);" |
| 1342 | + "data = np.genfromtxt(\"data/Train_Skies/Train_Skies/\\\n", |
| 1343 | + "Training_Sky%d.csv\" % (n_sky),\n", |
| 1344 | + " dtype=None,\n", |
| 1345 | + " skip_header=1,\n", |
| 1346 | + " delimiter=\",\",\n", |
| 1347 | + " usecols=[1, 2, 3, 4])" |
1330 | 1348 | ], |
1331 | 1349 | "language": "python", |
1332 | 1350 | "metadata": {}, |
|
1341 | 1359 | "samples = 10.5e5\n", |
1342 | 1360 | "traces = halo_posteriors(3, data, samples=samples,\n", |
1343 | 1361 | " burn_in=9.5e5,\n", |
1344 | | - " thin=10);" |
| 1362 | + " thin=10)" |
1345 | 1363 | ], |
1346 | 1364 | "language": "python", |
1347 | 1365 | "metadata": {}, |
|
1368 | 1386 | "cell_type": "code", |
1369 | 1387 | "collapsed": false, |
1370 | 1388 | "input": [ |
| 1389 | + "\n", |
1371 | 1390 | "fig = draw_sky(data)\n", |
1372 | 1391 | "plt.title(\"Galaxy positions and ellipcities of sky %d.\" % n_sky)\n", |
1373 | 1392 | "plt.xlabel(\"x-position\")\n", |
|
1384 | 1403 | " label=\"True halo position\",\n", |
1385 | 1404 | " c=\"k\", s=90)\n", |
1386 | 1405 | "\n", |
1387 | | - "# plt.legend(scatterpoints=1)\n", |
| 1406 | + "# plt.legend(scatterpoints = 1)\n", |
1388 | 1407 | "plt.xlim(0, 4200)\n", |
1389 | 1408 | "plt.ylim(0, 4200);" |
1390 | 1409 | ], |
|
1424 | 1443 | "mean_posterior = traces.mean(axis=0).reshape(1, 4)\n", |
1425 | 1444 | "print mean_posterior\n", |
1426 | 1445 | "\n", |
| 1446 | + "\n", |
1427 | 1447 | "nhalo_all = _halo_data[0].reshape(1, 1)\n", |
1428 | 1448 | "x_true_all = _halo_data[3].reshape(1, 1)\n", |
1429 | 1449 | "y_true_all = _halo_data[4].reshape(1, 1)\n", |
1430 | 1450 | "x_ref_all = _halo_data[1].reshape(1, 1)\n", |
1431 | 1451 | "y_ref_all = _halo_data[2].reshape(1, 1)\n", |
1432 | 1452 | "sky_prediction = mean_posterior\n", |
1433 | 1453 | "\n", |
| 1454 | + "\n", |
1434 | 1455 | "print \"Using the mean:\"\n", |
1435 | | - "main_score([1], x_true_all, y_true_all, x_ref_all, y_ref_all, sky_prediction)\n", |
| 1456 | + "main_score([1], x_true_all, y_true_all,\n", |
| 1457 | + " x_ref_all, y_ref_all, sky_prediction)\n", |
1436 | 1458 | "\n", |
1437 | 1459 | "# what's a bad score?\n", |
1438 | 1460 | "print\n", |
1439 | 1461 | "random_guess = np.random.randint(0, 4200, size=(1, 2))\n", |
1440 | 1462 | "print \"Using a random location:\", random_guess\n", |
1441 | | - "main_score([1], x_true_all, y_true_all, x_ref_all, y_ref_all, random_guess)\n", |
1442 | | - "print;" |
| 1463 | + "main_score([1], x_true_all, y_true_all,\n", |
| 1464 | + " x_ref_all, y_ref_all, random_guess)\n", |
| 1465 | + "print" |
1443 | 1466 | ], |
1444 | 1467 | "language": "python", |
1445 | 1468 | "metadata": {}, |
|
1519 | 1542 | "def css_styling():\n", |
1520 | 1543 | " styles = open(\"../styles/custom.css\", \"r\").read()\n", |
1521 | 1544 | " return HTML(styles)\n", |
1522 | | - "css_styling();" |
| 1545 | + "css_styling()" |
1523 | 1546 | ], |
1524 | 1547 | "language": "python", |
1525 | 1548 | "metadata": {}, |
|
0 commit comments