| 
300 | 300 |       "import numpy as np\n",  | 
301 | 301 |       "n_data_points = 5  # in CH1 we had ~70 data points\n",  | 
302 | 302 |       "\n",  | 
 | 303 | +      "\n",  | 
303 | 304 |       "@pm.deterministic\n",  | 
304 | 305 |       "def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):\n",  | 
305 | 306 |       "    out = np.zeros(n_data_points)\n",  | 
 | 
532 | 533 |      "cell_type": "code",  | 
533 | 534 |      "collapsed": false,  | 
534 | 535 |      "input": [  | 
535 |  | -      "alpha = 1./20.\n",  | 
 | 536 | +      "alpha = 1. / 20.\n",  | 
536 | 537 |       "lambda_1, lambda_2 = pm.rexponential(alpha, 2)\n",  | 
537 | 538 |       "print lambda_1, lambda_2"  | 
538 | 539 |      ],  | 
 | 
579 | 580 |      "collapsed": false,  | 
580 | 581 |      "input": [  | 
581 | 582 |       "plt.bar(np.arange(80), data, color=\"#348ABD\")\n",  | 
582 |  | -      "plt.bar(tau-1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n",  | 
 | 583 | +      "plt.bar(tau - 1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n",  | 
583 | 584 |       "plt.xlabel(\"Time (days)\")\n",  | 
584 | 585 |       "plt.ylabel(\"count of text-msgs received\")\n",  | 
585 | 586 |       "plt.title(\"Artificial dataset\")\n",  | 
 | 
616 | 617 |      "input": [  | 
617 | 618 |       "def plot_artificial_sms_dataset():\n",  | 
618 | 619 |       "    tau = pm.rdiscrete_uniform(0, 80)\n",  | 
619 |  | -      "    alpha = 1./20.\n",  | 
 | 620 | +      "    alpha = 1. / 20.\n",  | 
620 | 621 |       "    lambda_1, lambda_2 = pm.rexponential(alpha, 2)\n",  | 
621 | 622 |       "    data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]\n",  | 
622 | 623 |       "    plt.bar(np.arange(80), data, color=\"#348ABD\")\n",  | 
623 |  | -      "    plt.bar(tau - 1, data[tau-1], color=\"r\", label=\"user behaviour changed\")\n",  | 
 | 624 | +      "    plt.bar(tau - 1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n",  | 
624 | 625 |       "    plt.xlim(0, 80)\n",  | 
625 | 626 |       "\n",  | 
626 | 627 |       "figsize(12.5, 5)\n",  | 
627 | 628 |       "plt.title(\"More example of artificial datasets\")\n",  | 
628 | 629 |       "for i in range(4):\n",  | 
629 | 630 |       "    plt.subplot(4, 1, i)\n",  | 
630 |  | -      "    plot_artificial_sms_dataset()\n"  | 
 | 631 | +      "    plot_artificial_sms_dataset()"  | 
631 | 632 |      ],  | 
632 | 633 |      "language": "python",  | 
633 | 634 |      "metadata": {},  | 
 | 
709 | 710 |      "cell_type": "code",  | 
710 | 711 |      "collapsed": false,  | 
711 | 712 |      "input": [  | 
712 |  | -      "#set constants\n",  | 
 | 713 | +      "# set constants\n",  | 
713 | 714 |       "p_true = 0.05  # remember, this is unknown.\n",  | 
714 | 715 |       "N = 1500\n",  | 
715 | 716 |       "\n",  | 
 | 
775 | 776 |      "cell_type": "code",  | 
776 | 777 |      "collapsed": false,  | 
777 | 778 |      "input": [  | 
778 |  | -      "#include the observations, which are Bernoulli\n",  | 
 | 779 | +      "# include the observations, which are Bernoulli\n",  | 
779 | 780 |       "obs = pm.Bernoulli(\"obs\", p, value=occurrences, observed=True)\n",  | 
780 | 781 |       "\n",  | 
781 |  | -      "#To be explained in chapter 3\n",  | 
 | 782 | +      "# To be explained in chapter 3\n",  | 
782 | 783 |       "mcmc = pm.MCMC([p, obs])\n",  | 
783 | 784 |       "mcmc.sample(18000, 1000)"  | 
784 | 785 |      ],  | 
 | 
860 | 861 |       "import pymc as pm\n",  | 
861 | 862 |       "figsize(12, 4)\n",  | 
862 | 863 |       "\n",  | 
863 |  | -      "#these two quantities are unknown to us.\n",  | 
 | 864 | +      "# these two quantities are unknown to us.\n",  | 
864 | 865 |       "true_p_A = 0.05\n",  | 
865 | 866 |       "true_p_B = 0.04\n",  | 
866 | 867 |       "\n",  | 
867 |  | -      "#notice the unequal sample sizes -- no problem in Bayesian analysis.\n",  | 
 | 868 | +      "# notice the unequal sample sizes -- no problem in Bayesian analysis.\n",  | 
868 | 869 |       "N_A = 1500\n",  | 
869 | 870 |       "N_B = 750\n",  | 
870 | 871 |       "\n",  | 
871 |  | -      "#generate some observations\n",  | 
 | 872 | +      "# generate some observations\n",  | 
872 | 873 |       "observations_A = pm.rbernoulli(true_p_A, N_A)\n",  | 
873 | 874 |       "observations_B = pm.rbernoulli(true_p_B, N_B)\n",  | 
874 | 875 |       "print \"Obs from Site A: \", observations_A[:30].astype(int), \"...\"\n",  | 
 | 
978 | 979 |      "input": [  | 
979 | 980 |       "figsize(12.5, 10)\n",  | 
980 | 981 |       "\n",  | 
981 |  | -      "#histogram of posteriors\n",  | 
 | 982 | +      "# histogram of posteriors\n",  | 
982 | 983 |       "\n",  | 
983 | 984 |       "ax = plt.subplot(311)\n",  | 
984 | 985 |       "\n",  | 
 | 
1257 | 1258 |       "                        fc=first_coin_flips,\n",  | 
1258 | 1259 |       "                        sc=second_coin_flips):\n",  | 
1259 | 1260 |       "\n",  | 
1260 |  | -      "    observed = fc*t_a + (1-fc)*sc\n",  | 
 | 1261 | +      "    observed = fc * t_a + (1 - fc) * sc\n",  | 
1261 | 1262 |       "    return observed.sum() / float(N)"  | 
1262 | 1263 |      ],  | 
1263 | 1264 |      "language": "python",  | 
 | 
1360 | 1361 |      "input": [  | 
1361 | 1362 |       "figsize(12.5, 3)\n",  | 
1362 | 1363 |       "p_trace = mcmc.trace(\"freq_cheating\")[:]\n",  | 
1363 |  | -      "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30, \n",  | 
 | 1364 | +      "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30,\n",  | 
1364 | 1365 |       "         label=\"posterior distribution\", color=\"#348ABD\")\n",  | 
1365 | 1366 |       "plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.3)\n",  | 
1366 | 1367 |       "plt.xlim(0, 1)\n",  | 
 | 
1415 | 1416 |      "input": [  | 
1416 | 1417 |       "p = pm.Uniform(\"freq_cheating\", 0, 1)\n",  | 
1417 | 1418 |       "\n",  | 
 | 1419 | +      "\n",  | 
1418 | 1420 |       "@pm.deterministic\n",  | 
1419 | 1421 |       "def p_skewed(p=p):\n",  | 
1420 |  | -      "    return 0.5*p + 0.25"  | 
 | 1422 | +      "    return 0.5 * p + 0.25"  | 
1421 | 1423 |      ],  | 
1422 | 1424 |      "language": "python",  | 
1423 | 1425 |      "metadata": {},  | 
 | 
1491 | 1493 |      "input": [  | 
1492 | 1494 |       "figsize(12.5, 3)\n",  | 
1493 | 1495 |       "p_trace = mcmc.trace(\"freq_cheating\")[:]\n",  | 
1494 |  | -      "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30, \n",  | 
 | 1496 | +      "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30,\n",  | 
1495 | 1497 |       "         label=\"posterior distribution\", color=\"#348ABD\")\n",  | 
1496 | 1498 |       "plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.2)\n",  | 
1497 | 1499 |       "plt.xlim(0, 1)\n",  | 
 | 
1539 | 1541 |       "N = 10\n",  | 
1540 | 1542 |       "x = np.empty(N, dtype=object)\n",  | 
1541 | 1543 |       "for i in range(0, N):\n",  | 
1542 |  | -      "    x[i] = pm.Exponential('x_%i' % i, (i+1)**2)"  | 
 | 1544 | +      "    x[i] = pm.Exponential('x_%i' % i, (i + 1) ** 2)"  | 
1543 | 1545 |      ],  | 
1544 | 1546 |      "language": "python",  | 
1545 | 1547 |      "metadata": {},  | 
 | 
1575 | 1577 |       "challenger_data = np.genfromtxt(\"data/challenger_data.csv\", skip_header=1,\n",  | 
1576 | 1578 |       "                                usecols=[1, 2], missing_values=\"NA\",\n",  | 
1577 | 1579 |       "                                delimiter=\",\")\n",  | 
1578 |  | -      "#drop the NA values\n",  | 
 | 1580 | +      "# drop the NA values\n",  | 
1579 | 1581 |       "challenger_data = challenger_data[~np.isnan(challenger_data[:, 1])]\n",  | 
1580 | 1582 |       "\n",  | 
1581 |  | -      "#plot it, as a function of tempature (the first column)\n",  | 
 | 1583 | +      "# plot it, as a function of tempature (the first column)\n",  | 
1582 | 1584 |       "print \"Temp (F), O-Ring failure?\"\n",  | 
1583 | 1585 |       "print challenger_data\n",  | 
1584 | 1586 |       "\n",  | 
 | 
1587 | 1589 |       "plt.yticks([0, 1])\n",  | 
1588 | 1590 |       "plt.ylabel(\"Damage Incident?\")\n",  | 
1589 | 1591 |       "plt.xlabel(\"Outside temperature (Fahrenheit)\")\n",  | 
1590 |  | -      "plt.title(\"Defects of the Space Shuttle O-Rings vs temperature\")\n"  | 
 | 1592 | +      "plt.title(\"Defects of the Space Shuttle O-Rings vs temperature\")"  | 
1591 | 1593 |      ],  | 
1592 | 1594 |      "language": "python",  | 
1593 | 1595 |      "metadata": {},  | 
 | 
1769 | 1771 |       "parameters = zip(mu, tau, colors)\n",  | 
1770 | 1772 |       "\n",  | 
1771 | 1773 |       "for _mu, _tau, _color in parameters:\n",  | 
1772 |  | -      "    plt.plot(x, nor.pdf(x, _mu, scale=1./_tau),\n",  | 
 | 1774 | +      "    plt.plot(x, nor.pdf(x, _mu, scale=1. / _tau),\n",  | 
1773 | 1775 |       "             label=\"$\\mu = %d,\\;\\\\tau = %.1f$\" % (_mu, _tau), color=_color)\n",  | 
1774 |  | -      "    plt.fill_between(x, nor.pdf(x, _mu, scale=1./_tau), color=_color,\n",  | 
 | 1776 | +      "    plt.fill_between(x, nor.pdf(x, _mu, scale=1. / _tau), color=_color,\n",  | 
1775 | 1777 |       "                     alpha=.33)\n",  | 
1776 | 1778 |       "\n",  | 
1777 | 1779 |       "plt.legend(loc=\"upper right\")\n",  | 
 | 
1820 | 1822 |       "temperature = challenger_data[:, 0]\n",  | 
1821 | 1823 |       "D = challenger_data[:, 1]  # defect or not?\n",  | 
1822 | 1824 |       "\n",  | 
1823 |  | -      "#notice the`value` here. We explain why below.\n",  | 
 | 1825 | +      "# notice the`value` here. We explain why below.\n",  | 
1824 | 1826 |       "beta = pm.Normal(\"beta\", 0, 0.001, value=0)\n",  | 
1825 | 1827 |       "alpha = pm.Normal(\"alpha\", 0, 0.001, value=0)\n",  | 
1826 | 1828 |       "\n",  | 
1827 | 1829 |       "\n",  | 
1828 | 1830 |       "@pm.deterministic\n",  | 
1829 | 1831 |       "def p(t=temperature, alpha=alpha, beta=beta):\n",  | 
1830 |  | -      "    return 1.0 / (1. + np.exp(beta*t + alpha))\n"  | 
 | 1832 | +      "    return 1.0 / (1. + np.exp(beta * t + alpha))"  | 
1831 | 1833 |      ],  | 
1832 | 1834 |      "language": "python",  | 
1833 | 1835 |      "metadata": {},  | 
 | 
1920 | 1922 |       "\n",  | 
1921 | 1923 |       "figsize(12.5, 6)\n",  | 
1922 | 1924 |       "\n",  | 
1923 |  | -      "#histogram of the samples:\n",  | 
 | 1925 | +      "# histogram of the samples:\n",  | 
1924 | 1926 |       "plt.subplot(211)\n",  | 
1925 | 1927 |       "plt.title(r\"Posterior distributions of the variables $\\alpha, \\beta$\")\n",  | 
1926 | 1928 |       "plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85,\n",  | 
 | 
1963 | 1965 |      "cell_type": "code",  | 
1964 | 1966 |      "collapsed": false,  | 
1965 | 1967 |      "input": [  | 
1966 |  | -      "t = np.linspace(temperature.min() - 5, temperature.max()+5, 50)[:, None]\n",  | 
 | 1968 | +      "t = np.linspace(temperature.min() - 5, temperature.max() + 5, 50)[:, None]\n",  | 
1967 | 1969 |       "p_t = logistic(t.T, beta_samples, alpha_samples)\n",  | 
1968 | 1970 |       "\n",  | 
1969 | 1971 |       "mean_prob_t = p_t.mean(axis=0)"  | 
 | 
2164 | 2166 |       "plt.title(\"Simulated dataset using posterior parameters\")\n",  | 
2165 | 2167 |       "figsize(12.5, 6)\n",  | 
2166 | 2168 |       "for i in range(4):\n",  | 
2167 |  | -      "    ax = plt.subplot(4, 1, i+1)\n",  | 
2168 |  | -      "    plt.scatter(temperature, simulations[1000*i, :], color=\"k\",\n",  | 
 | 2169 | +      "    ax = plt.subplot(4, 1, i + 1)\n",  | 
 | 2170 | +      "    plt.scatter(temperature, simulations[1000 * i, :], color=\"k\",\n",  | 
2169 | 2171 |       "                s=50, alpha=0.6)"  | 
2170 | 2172 |      ],  | 
2171 | 2173 |      "language": "python",  | 
 | 
2369 | 2371 |       "plt.title(\"Random model\")\n",  | 
2370 | 2372 |       "\n",  | 
2371 | 2373 |       "# constant model\n",  | 
2372 |  | -      "constant_prob = 7./23*np.ones(23)\n",  | 
 | 2374 | +      "constant_prob = 7. / 23 * np.ones(23)\n",  | 
2373 | 2375 |       "separation_plot(constant_prob, D)\n",  | 
2374 | 2376 |       "plt.title(\"Constant-prediction model\")"  | 
2375 | 2377 |      ],  | 
 | 
2448 | 2450 |      "cell_type": "code",  | 
2449 | 2451 |      "collapsed": false,  | 
2450 | 2452 |      "input": [  | 
2451 |  | -      "#type your code here.\n",  | 
 | 2453 | +      "# type your code here.\n",  | 
2452 | 2454 |       "figsize(12.5, 4)\n",  | 
2453 | 2455 |       "\n",  | 
2454 | 2456 |       "plt.scatter(alpha_samples, beta_samples, alpha=0.1)\n",  | 
 | 
0 commit comments