|
300 | 300 | "import numpy as np\n", |
301 | 301 | "n_data_points = 5 # in CH1 we had ~70 data points\n", |
302 | 302 | "\n", |
| 303 | + "\n", |
303 | 304 | "@pm.deterministic\n", |
304 | 305 | "def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):\n", |
305 | 306 | " out = np.zeros(n_data_points)\n", |
|
532 | 533 | "cell_type": "code", |
533 | 534 | "collapsed": false, |
534 | 535 | "input": [ |
535 | | - "alpha = 1./20.\n", |
| 536 | + "alpha = 1. / 20.\n", |
536 | 537 | "lambda_1, lambda_2 = pm.rexponential(alpha, 2)\n", |
537 | 538 | "print lambda_1, lambda_2" |
538 | 539 | ], |
|
579 | 580 | "collapsed": false, |
580 | 581 | "input": [ |
581 | 582 | "plt.bar(np.arange(80), data, color=\"#348ABD\")\n", |
582 | | - "plt.bar(tau-1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n", |
| 583 | + "plt.bar(tau - 1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n", |
583 | 584 | "plt.xlabel(\"Time (days)\")\n", |
584 | 585 | "plt.ylabel(\"count of text-msgs received\")\n", |
585 | 586 | "plt.title(\"Artificial dataset\")\n", |
|
616 | 617 | "input": [ |
617 | 618 | "def plot_artificial_sms_dataset():\n", |
618 | 619 | " tau = pm.rdiscrete_uniform(0, 80)\n", |
619 | | - " alpha = 1./20.\n", |
| 620 | + " alpha = 1. / 20.\n", |
620 | 621 | " lambda_1, lambda_2 = pm.rexponential(alpha, 2)\n", |
621 | 622 | " data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]\n", |
622 | 623 | " plt.bar(np.arange(80), data, color=\"#348ABD\")\n", |
623 | | - " plt.bar(tau - 1, data[tau-1], color=\"r\", label=\"user behaviour changed\")\n", |
| 624 | + " plt.bar(tau - 1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n", |
624 | 625 | " plt.xlim(0, 80)\n", |
625 | 626 | "\n", |
626 | 627 | "figsize(12.5, 5)\n", |
627 | 628 | "plt.title(\"More example of artificial datasets\")\n", |
628 | 629 | "for i in range(4):\n", |
629 | 630 | " plt.subplot(4, 1, i)\n", |
630 | | - " plot_artificial_sms_dataset()\n" |
| 631 | + " plot_artificial_sms_dataset()" |
631 | 632 | ], |
632 | 633 | "language": "python", |
633 | 634 | "metadata": {}, |
|
709 | 710 | "cell_type": "code", |
710 | 711 | "collapsed": false, |
711 | 712 | "input": [ |
712 | | - "#set constants\n", |
| 713 | + "# set constants\n", |
713 | 714 | "p_true = 0.05 # remember, this is unknown.\n", |
714 | 715 | "N = 1500\n", |
715 | 716 | "\n", |
|
775 | 776 | "cell_type": "code", |
776 | 777 | "collapsed": false, |
777 | 778 | "input": [ |
778 | | - "#include the observations, which are Bernoulli\n", |
| 779 | + "# include the observations, which are Bernoulli\n", |
779 | 780 | "obs = pm.Bernoulli(\"obs\", p, value=occurrences, observed=True)\n", |
780 | 781 | "\n", |
781 | | - "#To be explained in chapter 3\n", |
| 782 | + "# To be explained in chapter 3\n", |
782 | 783 | "mcmc = pm.MCMC([p, obs])\n", |
783 | 784 | "mcmc.sample(18000, 1000)" |
784 | 785 | ], |
|
860 | 861 | "import pymc as pm\n", |
861 | 862 | "figsize(12, 4)\n", |
862 | 863 | "\n", |
863 | | - "#these two quantities are unknown to us.\n", |
| 864 | + "# these two quantities are unknown to us.\n", |
864 | 865 | "true_p_A = 0.05\n", |
865 | 866 | "true_p_B = 0.04\n", |
866 | 867 | "\n", |
867 | | - "#notice the unequal sample sizes -- no problem in Bayesian analysis.\n", |
| 868 | + "# notice the unequal sample sizes -- no problem in Bayesian analysis.\n", |
868 | 869 | "N_A = 1500\n", |
869 | 870 | "N_B = 750\n", |
870 | 871 | "\n", |
871 | | - "#generate some observations\n", |
| 872 | + "# generate some observations\n", |
872 | 873 | "observations_A = pm.rbernoulli(true_p_A, N_A)\n", |
873 | 874 | "observations_B = pm.rbernoulli(true_p_B, N_B)\n", |
874 | 875 | "print \"Obs from Site A: \", observations_A[:30].astype(int), \"...\"\n", |
|
978 | 979 | "input": [ |
979 | 980 | "figsize(12.5, 10)\n", |
980 | 981 | "\n", |
981 | | - "#histogram of posteriors\n", |
| 982 | + "# histogram of posteriors\n", |
982 | 983 | "\n", |
983 | 984 | "ax = plt.subplot(311)\n", |
984 | 985 | "\n", |
|
1257 | 1258 | " fc=first_coin_flips,\n", |
1258 | 1259 | " sc=second_coin_flips):\n", |
1259 | 1260 | "\n", |
1260 | | - " observed = fc*t_a + (1-fc)*sc\n", |
| 1261 | + " observed = fc * t_a + (1 - fc) * sc\n", |
1261 | 1262 | " return observed.sum() / float(N)" |
1262 | 1263 | ], |
1263 | 1264 | "language": "python", |
|
1360 | 1361 | "input": [ |
1361 | 1362 | "figsize(12.5, 3)\n", |
1362 | 1363 | "p_trace = mcmc.trace(\"freq_cheating\")[:]\n", |
1363 | | - "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30, \n", |
| 1364 | + "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30,\n", |
1364 | 1365 | " label=\"posterior distribution\", color=\"#348ABD\")\n", |
1365 | 1366 | "plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.3)\n", |
1366 | 1367 | "plt.xlim(0, 1)\n", |
|
1415 | 1416 | "input": [ |
1416 | 1417 | "p = pm.Uniform(\"freq_cheating\", 0, 1)\n", |
1417 | 1418 | "\n", |
| 1419 | + "\n", |
1418 | 1420 | "@pm.deterministic\n", |
1419 | 1421 | "def p_skewed(p=p):\n", |
1420 | | - " return 0.5*p + 0.25" |
| 1422 | + " return 0.5 * p + 0.25" |
1421 | 1423 | ], |
1422 | 1424 | "language": "python", |
1423 | 1425 | "metadata": {}, |
|
1491 | 1493 | "input": [ |
1492 | 1494 | "figsize(12.5, 3)\n", |
1493 | 1495 | "p_trace = mcmc.trace(\"freq_cheating\")[:]\n", |
1494 | | - "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30, \n", |
| 1496 | + "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30,\n", |
1495 | 1497 | " label=\"posterior distribution\", color=\"#348ABD\")\n", |
1496 | 1498 | "plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.2)\n", |
1497 | 1499 | "plt.xlim(0, 1)\n", |
|
1539 | 1541 | "N = 10\n", |
1540 | 1542 | "x = np.empty(N, dtype=object)\n", |
1541 | 1543 | "for i in range(0, N):\n", |
1542 | | - " x[i] = pm.Exponential('x_%i' % i, (i+1)**2)" |
| 1544 | + " x[i] = pm.Exponential('x_%i' % i, (i + 1) ** 2)" |
1543 | 1545 | ], |
1544 | 1546 | "language": "python", |
1545 | 1547 | "metadata": {}, |
|
1575 | 1577 | "challenger_data = np.genfromtxt(\"data/challenger_data.csv\", skip_header=1,\n", |
1576 | 1578 | " usecols=[1, 2], missing_values=\"NA\",\n", |
1577 | 1579 | " delimiter=\",\")\n", |
1578 | | - "#drop the NA values\n", |
| 1580 | + "# drop the NA values\n", |
1579 | 1581 | "challenger_data = challenger_data[~np.isnan(challenger_data[:, 1])]\n", |
1580 | 1582 | "\n", |
1581 | | - "#plot it, as a function of tempature (the first column)\n", |
| 1583 | + "# plot it, as a function of tempature (the first column)\n", |
1582 | 1584 | "print \"Temp (F), O-Ring failure?\"\n", |
1583 | 1585 | "print challenger_data\n", |
1584 | 1586 | "\n", |
|
1587 | 1589 | "plt.yticks([0, 1])\n", |
1588 | 1590 | "plt.ylabel(\"Damage Incident?\")\n", |
1589 | 1591 | "plt.xlabel(\"Outside temperature (Fahrenheit)\")\n", |
1590 | | - "plt.title(\"Defects of the Space Shuttle O-Rings vs temperature\")\n" |
| 1592 | + "plt.title(\"Defects of the Space Shuttle O-Rings vs temperature\")" |
1591 | 1593 | ], |
1592 | 1594 | "language": "python", |
1593 | 1595 | "metadata": {}, |
|
1769 | 1771 | "parameters = zip(mu, tau, colors)\n", |
1770 | 1772 | "\n", |
1771 | 1773 | "for _mu, _tau, _color in parameters:\n", |
1772 | | - " plt.plot(x, nor.pdf(x, _mu, scale=1./_tau),\n", |
| 1774 | + " plt.plot(x, nor.pdf(x, _mu, scale=1. / _tau),\n", |
1773 | 1775 | " label=\"$\\mu = %d,\\;\\\\tau = %.1f$\" % (_mu, _tau), color=_color)\n", |
1774 | | - " plt.fill_between(x, nor.pdf(x, _mu, scale=1./_tau), color=_color,\n", |
| 1776 | + " plt.fill_between(x, nor.pdf(x, _mu, scale=1. / _tau), color=_color,\n", |
1775 | 1777 | " alpha=.33)\n", |
1776 | 1778 | "\n", |
1777 | 1779 | "plt.legend(loc=\"upper right\")\n", |
|
1820 | 1822 | "temperature = challenger_data[:, 0]\n", |
1821 | 1823 | "D = challenger_data[:, 1] # defect or not?\n", |
1822 | 1824 | "\n", |
1823 | | - "#notice the`value` here. We explain why below.\n", |
| 1825 | + "# notice the`value` here. We explain why below.\n", |
1824 | 1826 | "beta = pm.Normal(\"beta\", 0, 0.001, value=0)\n", |
1825 | 1827 | "alpha = pm.Normal(\"alpha\", 0, 0.001, value=0)\n", |
1826 | 1828 | "\n", |
1827 | 1829 | "\n", |
1828 | 1830 | "@pm.deterministic\n", |
1829 | 1831 | "def p(t=temperature, alpha=alpha, beta=beta):\n", |
1830 | | - " return 1.0 / (1. + np.exp(beta*t + alpha))\n" |
| 1832 | + " return 1.0 / (1. + np.exp(beta * t + alpha))" |
1831 | 1833 | ], |
1832 | 1834 | "language": "python", |
1833 | 1835 | "metadata": {}, |
|
1920 | 1922 | "\n", |
1921 | 1923 | "figsize(12.5, 6)\n", |
1922 | 1924 | "\n", |
1923 | | - "#histogram of the samples:\n", |
| 1925 | + "# histogram of the samples:\n", |
1924 | 1926 | "plt.subplot(211)\n", |
1925 | 1927 | "plt.title(r\"Posterior distributions of the variables $\\alpha, \\beta$\")\n", |
1926 | 1928 | "plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85,\n", |
|
1963 | 1965 | "cell_type": "code", |
1964 | 1966 | "collapsed": false, |
1965 | 1967 | "input": [ |
1966 | | - "t = np.linspace(temperature.min() - 5, temperature.max()+5, 50)[:, None]\n", |
| 1968 | + "t = np.linspace(temperature.min() - 5, temperature.max() + 5, 50)[:, None]\n", |
1967 | 1969 | "p_t = logistic(t.T, beta_samples, alpha_samples)\n", |
1968 | 1970 | "\n", |
1969 | 1971 | "mean_prob_t = p_t.mean(axis=0)" |
|
2164 | 2166 | "plt.title(\"Simulated dataset using posterior parameters\")\n", |
2165 | 2167 | "figsize(12.5, 6)\n", |
2166 | 2168 | "for i in range(4):\n", |
2167 | | - " ax = plt.subplot(4, 1, i+1)\n", |
2168 | | - " plt.scatter(temperature, simulations[1000*i, :], color=\"k\",\n", |
| 2169 | + " ax = plt.subplot(4, 1, i + 1)\n", |
| 2170 | + " plt.scatter(temperature, simulations[1000 * i, :], color=\"k\",\n", |
2169 | 2171 | " s=50, alpha=0.6)" |
2170 | 2172 | ], |
2171 | 2173 | "language": "python", |
|
2369 | 2371 | "plt.title(\"Random model\")\n", |
2370 | 2372 | "\n", |
2371 | 2373 | "# constant model\n", |
2372 | | - "constant_prob = 7./23*np.ones(23)\n", |
| 2374 | + "constant_prob = 7. / 23 * np.ones(23)\n", |
2373 | 2375 | "separation_plot(constant_prob, D)\n", |
2374 | 2376 | "plt.title(\"Constant-prediction model\")" |
2375 | 2377 | ], |
|
2448 | 2450 | "cell_type": "code", |
2449 | 2451 | "collapsed": false, |
2450 | 2452 | "input": [ |
2451 | | - "#type your code here.\n", |
| 2453 | + "# type your code here.\n", |
2452 | 2454 | "figsize(12.5, 4)\n", |
2453 | 2455 | "\n", |
2454 | 2456 | "plt.scatter(alpha_samples, beta_samples, alpha=0.1)\n", |
|
0 commit comments