|  | 
| 300 | 300 |       "import numpy as np\n", | 
| 301 | 301 |       "n_data_points = 5  # in CH1 we had ~70 data points\n", | 
| 302 | 302 |       "\n", | 
|  | 303 | +      "\n", | 
| 303 | 304 |       "@pm.deterministic\n", | 
| 304 | 305 |       "def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):\n", | 
| 305 | 306 |       "    out = np.zeros(n_data_points)\n", | 
|  | 
| 532 | 533 |      "cell_type": "code", | 
| 533 | 534 |      "collapsed": false, | 
| 534 | 535 |      "input": [ | 
| 535 |  | -      "alpha = 1./20.\n", | 
|  | 536 | +      "alpha = 1. / 20.\n", | 
| 536 | 537 |       "lambda_1, lambda_2 = pm.rexponential(alpha, 2)\n", | 
| 537 | 538 |       "print lambda_1, lambda_2" | 
| 538 | 539 |      ], | 
|  | 
| 579 | 580 |      "collapsed": false, | 
| 580 | 581 |      "input": [ | 
| 581 | 582 |       "plt.bar(np.arange(80), data, color=\"#348ABD\")\n", | 
| 582 |  | -      "plt.bar(tau-1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n", | 
|  | 583 | +      "plt.bar(tau - 1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n", | 
| 583 | 584 |       "plt.xlabel(\"Time (days)\")\n", | 
| 584 | 585 |       "plt.ylabel(\"count of text-msgs received\")\n", | 
| 585 | 586 |       "plt.title(\"Artificial dataset\")\n", | 
|  | 
| 616 | 617 |      "input": [ | 
| 617 | 618 |       "def plot_artificial_sms_dataset():\n", | 
| 618 | 619 |       "    tau = pm.rdiscrete_uniform(0, 80)\n", | 
| 619 |  | -      "    alpha = 1./20.\n", | 
|  | 620 | +      "    alpha = 1. / 20.\n", | 
| 620 | 621 |       "    lambda_1, lambda_2 = pm.rexponential(alpha, 2)\n", | 
| 621 | 622 |       "    data = np.r_[pm.rpoisson(lambda_1, tau), pm.rpoisson(lambda_2, 80 - tau)]\n", | 
| 622 | 623 |       "    plt.bar(np.arange(80), data, color=\"#348ABD\")\n", | 
| 623 |  | -      "    plt.bar(tau - 1, data[tau-1], color=\"r\", label=\"user behaviour changed\")\n", | 
|  | 624 | +      "    plt.bar(tau - 1, data[tau - 1], color=\"r\", label=\"user behaviour changed\")\n", | 
| 624 | 625 |       "    plt.xlim(0, 80)\n", | 
| 625 | 626 |       "\n", | 
| 626 | 627 |       "figsize(12.5, 5)\n", | 
| 627 | 628 |       "plt.title(\"More example of artificial datasets\")\n", | 
| 628 | 629 |       "for i in range(4):\n", | 
| 629 | 630 |       "    plt.subplot(4, 1, i)\n", | 
| 630 |  | -      "    plot_artificial_sms_dataset()\n" | 
|  | 631 | +      "    plot_artificial_sms_dataset()" | 
| 631 | 632 |      ], | 
| 632 | 633 |      "language": "python", | 
| 633 | 634 |      "metadata": {}, | 
|  | 
| 709 | 710 |      "cell_type": "code", | 
| 710 | 711 |      "collapsed": false, | 
| 711 | 712 |      "input": [ | 
| 712 |  | -      "#set constants\n", | 
|  | 713 | +      "# set constants\n", | 
| 713 | 714 |       "p_true = 0.05  # remember, this is unknown.\n", | 
| 714 | 715 |       "N = 1500\n", | 
| 715 | 716 |       "\n", | 
|  | 
| 775 | 776 |      "cell_type": "code", | 
| 776 | 777 |      "collapsed": false, | 
| 777 | 778 |      "input": [ | 
| 778 |  | -      "#include the observations, which are Bernoulli\n", | 
|  | 779 | +      "# include the observations, which are Bernoulli\n", | 
| 779 | 780 |       "obs = pm.Bernoulli(\"obs\", p, value=occurrences, observed=True)\n", | 
| 780 | 781 |       "\n", | 
| 781 |  | -      "#To be explained in chapter 3\n", | 
|  | 782 | +      "# To be explained in chapter 3\n", | 
| 782 | 783 |       "mcmc = pm.MCMC([p, obs])\n", | 
| 783 | 784 |       "mcmc.sample(18000, 1000)" | 
| 784 | 785 |      ], | 
|  | 
| 860 | 861 |       "import pymc as pm\n", | 
| 861 | 862 |       "figsize(12, 4)\n", | 
| 862 | 863 |       "\n", | 
| 863 |  | -      "#these two quantities are unknown to us.\n", | 
|  | 864 | +      "# these two quantities are unknown to us.\n", | 
| 864 | 865 |       "true_p_A = 0.05\n", | 
| 865 | 866 |       "true_p_B = 0.04\n", | 
| 866 | 867 |       "\n", | 
| 867 |  | -      "#notice the unequal sample sizes -- no problem in Bayesian analysis.\n", | 
|  | 868 | +      "# notice the unequal sample sizes -- no problem in Bayesian analysis.\n", | 
| 868 | 869 |       "N_A = 1500\n", | 
| 869 | 870 |       "N_B = 750\n", | 
| 870 | 871 |       "\n", | 
| 871 |  | -      "#generate some observations\n", | 
|  | 872 | +      "# generate some observations\n", | 
| 872 | 873 |       "observations_A = pm.rbernoulli(true_p_A, N_A)\n", | 
| 873 | 874 |       "observations_B = pm.rbernoulli(true_p_B, N_B)\n", | 
| 874 | 875 |       "print \"Obs from Site A: \", observations_A[:30].astype(int), \"...\"\n", | 
|  | 
| 978 | 979 |      "input": [ | 
| 979 | 980 |       "figsize(12.5, 10)\n", | 
| 980 | 981 |       "\n", | 
| 981 |  | -      "#histogram of posteriors\n", | 
|  | 982 | +      "# histogram of posteriors\n", | 
| 982 | 983 |       "\n", | 
| 983 | 984 |       "ax = plt.subplot(311)\n", | 
| 984 | 985 |       "\n", | 
|  | 
| 1257 | 1258 |       "                        fc=first_coin_flips,\n", | 
| 1258 | 1259 |       "                        sc=second_coin_flips):\n", | 
| 1259 | 1260 |       "\n", | 
| 1260 |  | -      "    observed = fc*t_a + (1-fc)*sc\n", | 
|  | 1261 | +      "    observed = fc * t_a + (1 - fc) * sc\n", | 
| 1261 | 1262 |       "    return observed.sum() / float(N)" | 
| 1262 | 1263 |      ], | 
| 1263 | 1264 |      "language": "python", | 
|  | 
| 1360 | 1361 |      "input": [ | 
| 1361 | 1362 |       "figsize(12.5, 3)\n", | 
| 1362 | 1363 |       "p_trace = mcmc.trace(\"freq_cheating\")[:]\n", | 
| 1363 |  | -      "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30, \n", | 
|  | 1364 | +      "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30,\n", | 
| 1364 | 1365 |       "         label=\"posterior distribution\", color=\"#348ABD\")\n", | 
| 1365 | 1366 |       "plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.3)\n", | 
| 1366 | 1367 |       "plt.xlim(0, 1)\n", | 
|  | 
| 1415 | 1416 |      "input": [ | 
| 1416 | 1417 |       "p = pm.Uniform(\"freq_cheating\", 0, 1)\n", | 
| 1417 | 1418 |       "\n", | 
|  | 1419 | +      "\n", | 
| 1418 | 1420 |       "@pm.deterministic\n", | 
| 1419 | 1421 |       "def p_skewed(p=p):\n", | 
| 1420 |  | -      "    return 0.5*p + 0.25" | 
|  | 1422 | +      "    return 0.5 * p + 0.25" | 
| 1421 | 1423 |      ], | 
| 1422 | 1424 |      "language": "python", | 
| 1423 | 1425 |      "metadata": {}, | 
|  | 
| 1491 | 1493 |      "input": [ | 
| 1492 | 1494 |       "figsize(12.5, 3)\n", | 
| 1493 | 1495 |       "p_trace = mcmc.trace(\"freq_cheating\")[:]\n", | 
| 1494 |  | -      "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30, \n", | 
|  | 1496 | +      "plt.hist(p_trace, histtype=\"stepfilled\", normed=True, alpha=0.85, bins=30,\n", | 
| 1495 | 1497 |       "         label=\"posterior distribution\", color=\"#348ABD\")\n", | 
| 1496 | 1498 |       "plt.vlines([.05, .35], [0, 0], [5, 5], alpha=0.2)\n", | 
| 1497 | 1499 |       "plt.xlim(0, 1)\n", | 
|  | 
| 1539 | 1541 |       "N = 10\n", | 
| 1540 | 1542 |       "x = np.empty(N, dtype=object)\n", | 
| 1541 | 1543 |       "for i in range(0, N):\n", | 
| 1542 |  | -      "    x[i] = pm.Exponential('x_%i' % i, (i+1)**2)" | 
|  | 1544 | +      "    x[i] = pm.Exponential('x_%i' % i, (i + 1) ** 2)" | 
| 1543 | 1545 |      ], | 
| 1544 | 1546 |      "language": "python", | 
| 1545 | 1547 |      "metadata": {}, | 
|  | 
| 1575 | 1577 |       "challenger_data = np.genfromtxt(\"data/challenger_data.csv\", skip_header=1,\n", | 
| 1576 | 1578 |       "                                usecols=[1, 2], missing_values=\"NA\",\n", | 
| 1577 | 1579 |       "                                delimiter=\",\")\n", | 
| 1578 |  | -      "#drop the NA values\n", | 
|  | 1580 | +      "# drop the NA values\n", | 
| 1579 | 1581 |       "challenger_data = challenger_data[~np.isnan(challenger_data[:, 1])]\n", | 
| 1580 | 1582 |       "\n", | 
| 1581 |  | -      "#plot it, as a function of tempature (the first column)\n", | 
|  | 1583 | +      "# plot it, as a function of tempature (the first column)\n", | 
| 1582 | 1584 |       "print \"Temp (F), O-Ring failure?\"\n", | 
| 1583 | 1585 |       "print challenger_data\n", | 
| 1584 | 1586 |       "\n", | 
|  | 
| 1587 | 1589 |       "plt.yticks([0, 1])\n", | 
| 1588 | 1590 |       "plt.ylabel(\"Damage Incident?\")\n", | 
| 1589 | 1591 |       "plt.xlabel(\"Outside temperature (Fahrenheit)\")\n", | 
| 1590 |  | -      "plt.title(\"Defects of the Space Shuttle O-Rings vs temperature\")\n" | 
|  | 1592 | +      "plt.title(\"Defects of the Space Shuttle O-Rings vs temperature\")" | 
| 1591 | 1593 |      ], | 
| 1592 | 1594 |      "language": "python", | 
| 1593 | 1595 |      "metadata": {}, | 
|  | 
| 1769 | 1771 |       "parameters = zip(mu, tau, colors)\n", | 
| 1770 | 1772 |       "\n", | 
| 1771 | 1773 |       "for _mu, _tau, _color in parameters:\n", | 
| 1772 |  | -      "    plt.plot(x, nor.pdf(x, _mu, scale=1./_tau),\n", | 
|  | 1774 | +      "    plt.plot(x, nor.pdf(x, _mu, scale=1. / _tau),\n", | 
| 1773 | 1775 |       "             label=\"$\\mu = %d,\\;\\\\tau = %.1f$\" % (_mu, _tau), color=_color)\n", | 
| 1774 |  | -      "    plt.fill_between(x, nor.pdf(x, _mu, scale=1./_tau), color=_color,\n", | 
|  | 1776 | +      "    plt.fill_between(x, nor.pdf(x, _mu, scale=1. / _tau), color=_color,\n", | 
| 1775 | 1777 |       "                     alpha=.33)\n", | 
| 1776 | 1778 |       "\n", | 
| 1777 | 1779 |       "plt.legend(loc=\"upper right\")\n", | 
|  | 
| 1820 | 1822 |       "temperature = challenger_data[:, 0]\n", | 
| 1821 | 1823 |       "D = challenger_data[:, 1]  # defect or not?\n", | 
| 1822 | 1824 |       "\n", | 
| 1823 |  | -      "#notice the`value` here. We explain why below.\n", | 
|  | 1825 | +      "# notice the`value` here. We explain why below.\n", | 
| 1824 | 1826 |       "beta = pm.Normal(\"beta\", 0, 0.001, value=0)\n", | 
| 1825 | 1827 |       "alpha = pm.Normal(\"alpha\", 0, 0.001, value=0)\n", | 
| 1826 | 1828 |       "\n", | 
| 1827 | 1829 |       "\n", | 
| 1828 | 1830 |       "@pm.deterministic\n", | 
| 1829 | 1831 |       "def p(t=temperature, alpha=alpha, beta=beta):\n", | 
| 1830 |  | -      "    return 1.0 / (1. + np.exp(beta*t + alpha))\n" | 
|  | 1832 | +      "    return 1.0 / (1. + np.exp(beta * t + alpha))" | 
| 1831 | 1833 |      ], | 
| 1832 | 1834 |      "language": "python", | 
| 1833 | 1835 |      "metadata": {}, | 
|  | 
| 1920 | 1922 |       "\n", | 
| 1921 | 1923 |       "figsize(12.5, 6)\n", | 
| 1922 | 1924 |       "\n", | 
| 1923 |  | -      "#histogram of the samples:\n", | 
|  | 1925 | +      "# histogram of the samples:\n", | 
| 1924 | 1926 |       "plt.subplot(211)\n", | 
| 1925 | 1927 |       "plt.title(r\"Posterior distributions of the variables $\\alpha, \\beta$\")\n", | 
| 1926 | 1928 |       "plt.hist(beta_samples, histtype='stepfilled', bins=35, alpha=0.85,\n", | 
|  | 
| 1963 | 1965 |      "cell_type": "code", | 
| 1964 | 1966 |      "collapsed": false, | 
| 1965 | 1967 |      "input": [ | 
| 1966 |  | -      "t = np.linspace(temperature.min() - 5, temperature.max()+5, 50)[:, None]\n", | 
|  | 1968 | +      "t = np.linspace(temperature.min() - 5, temperature.max() + 5, 50)[:, None]\n", | 
| 1967 | 1969 |       "p_t = logistic(t.T, beta_samples, alpha_samples)\n", | 
| 1968 | 1970 |       "\n", | 
| 1969 | 1971 |       "mean_prob_t = p_t.mean(axis=0)" | 
|  | 
| 2164 | 2166 |       "plt.title(\"Simulated dataset using posterior parameters\")\n", | 
| 2165 | 2167 |       "figsize(12.5, 6)\n", | 
| 2166 | 2168 |       "for i in range(4):\n", | 
| 2167 |  | -      "    ax = plt.subplot(4, 1, i+1)\n", | 
| 2168 |  | -      "    plt.scatter(temperature, simulations[1000*i, :], color=\"k\",\n", | 
|  | 2169 | +      "    ax = plt.subplot(4, 1, i + 1)\n", | 
|  | 2170 | +      "    plt.scatter(temperature, simulations[1000 * i, :], color=\"k\",\n", | 
| 2169 | 2171 |       "                s=50, alpha=0.6)" | 
| 2170 | 2172 |      ], | 
| 2171 | 2173 |      "language": "python", | 
|  | 
| 2369 | 2371 |       "plt.title(\"Random model\")\n", | 
| 2370 | 2372 |       "\n", | 
| 2371 | 2373 |       "# constant model\n", | 
| 2372 |  | -      "constant_prob = 7./23*np.ones(23)\n", | 
|  | 2374 | +      "constant_prob = 7. / 23 * np.ones(23)\n", | 
| 2373 | 2375 |       "separation_plot(constant_prob, D)\n", | 
| 2374 | 2376 |       "plt.title(\"Constant-prediction model\")" | 
| 2375 | 2377 |      ], | 
|  | 
| 2448 | 2450 |      "cell_type": "code", | 
| 2449 | 2451 |      "collapsed": false, | 
| 2450 | 2452 |      "input": [ | 
| 2451 |  | -      "#type your code here.\n", | 
|  | 2453 | +      "# type your code here.\n", | 
| 2452 | 2454 |       "figsize(12.5, 4)\n", | 
| 2453 | 2455 |       "\n", | 
| 2454 | 2456 |       "plt.scatter(alpha_samples, beta_samples, alpha=0.1)\n", | 
|  | 
0 commit comments