From 6536b1b5d4f54a6e45014fc589e5819b1582b9bb Mon Sep 17 00:00:00 2001
From: JuanPablo <jpabloaj@gmail.com>
Date: Sun, 3 Jan 2016 18:28:20 -0300
Subject: [PATCH 1/9] analyze_webstats with PEP8

---
 ch01/analyze_webstats.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ch01/analyze_webstats.py b/ch01/analyze_webstats.py
index 8d3c4c41..5da892e2 100644
--- a/ch01/analyze_webstats.py
+++ b/ch01/analyze_webstats.py
@@ -26,8 +26,9 @@
 x = x[~sp.isnan(y)]
 y = y[~sp.isnan(y)]
 
-# plot input data
+
 def plot_models(x, y, models, fname, mx=None, ymax=None, xmin=None):
+    ''' plot input data '''
 
     plt.figure(num=None, figsize=(8, 6))
     plt.clf()
@@ -138,8 +139,8 @@ def error(f, x, y):
 train = sorted(shuffled[split_idx:])
 fbt1 = sp.poly1d(sp.polyfit(xb[train], yb[train], 1))
 fbt2 = sp.poly1d(sp.polyfit(xb[train], yb[train], 2))
-print("fbt2(x)= \n%s"%fbt2)
-print("fbt2(x)-100,000= \n%s"%(fbt2-100000))
+print("fbt2(x)= \n%s" % fbt2)
+print("fbt2(x)-100,000= \n%s" % (fbt2-100000))
 fbt3 = sp.poly1d(sp.polyfit(xb[train], yb[train], 3))
 fbt10 = sp.poly1d(sp.polyfit(xb[train], yb[train], 10))
 fbt100 = sp.poly1d(sp.polyfit(xb[train], yb[train], 100))

From db795e08dc720fce5a1f327f1ce3de154cf2303c Mon Sep 17 00:00:00 2001
From: Luis Pedro Coelho <luis@luispedro.org>
Date: Tue, 8 Mar 2016 14:57:27 +0100
Subject: [PATCH 2/9] BUG Fix name of function chist

The function for computing color histograms is chist. Use it throughout.

closes #15
---
 ch10/neighbors.py             | 4 ++--
 ch10/simple_classification.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ch10/neighbors.py b/ch10/neighbors.py
index c62a0e2c..1f71d0de 100644
--- a/ch10/neighbors.py
+++ b/ch10/neighbors.py
@@ -6,7 +6,7 @@
 import numpy as np
 import mahotas as mh
 from glob import glob
-from features import texture, color_histogram
+from features import texture, chist
 from matplotlib import pyplot as plt
 from sklearn.preprocessing import StandardScaler
 from scipy.spatial import distance
@@ -29,7 +29,7 @@
     imc = mh.imread(fname)
     imc = imc[200:-200,200:-200]
     haralicks.append(texture(mh.colors.rgb2grey(imc)))
-    chists.append(color_histogram(imc))
+    chists.append(chist(imc))
 
 haralicks = np.array(haralicks)
 chists = np.array(chists)
diff --git a/ch10/simple_classification.py b/ch10/simple_classification.py
index 0e3ab347..a5a448d2 100644
--- a/ch10/simple_classification.py
+++ b/ch10/simple_classification.py
@@ -9,7 +9,7 @@
 import numpy as np
 from glob import glob
 
-from features import texture, color_histogram
+from features import texture, chist
 from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
@@ -32,7 +32,7 @@
 for fname in sorted(images):
     imc = mh.imread(fname)
     haralicks.append(texture(mh.colors.rgb2grey(imc)))
-    chists.append(color_histogram(imc))
+    chists.append(chist(imc))
 
     # Files are named like building00.jpg, scene23.jpg...
     labels.append(fname[:-len('xx.jpg')])

From c71430360ececfe2bbe7dbc79add793dab85e5d9 Mon Sep 17 00:00:00 2001
From: tomahawk28 <tomahawk28@gmail.com>
Date: Mon, 2 Nov 2015 15:37:33 +0900
Subject: [PATCH 3/9] Fix predict method called

---
 ch02/figure4_5_no_sklearn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ch02/figure4_5_no_sklearn.py b/ch02/figure4_5_no_sklearn.py
index 5f67e0d7..adc83d73 100644
--- a/ch02/figure4_5_no_sklearn.py
+++ b/ch02/figure4_5_no_sklearn.py
@@ -45,7 +45,7 @@ def plot_decision(features, labels):
 
     model = fit_model(1, features[:, (0, 2)], np.array(labels))
     C = predict(
-        np.vstack([X.ravel(), Y.ravel()]).T, model).reshape(X.shape)
+        model, np.vstack([X.ravel(), Y.ravel()]).T).reshape(X.shape)
     if COLOUR_FIGURE:
         cmap = ListedColormap([(1., .6, .6), (.6, 1., .6), (.6, .6, 1.)])
     else:

From b97a0be7a45ca977442f195b3be1dbc42c12621d Mon Sep 17 00:00:00 2001
From: pi <ao.song@outlook.com>
Date: Thu, 28 Apr 2016 17:00:16 +0800
Subject: [PATCH 4/9] format this file

---
 ch01/gen_webstats.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/ch01/gen_webstats.py b/ch01/gen_webstats.py
index fa133d76..61d0b738 100644
--- a/ch01/gen_webstats.py
+++ b/ch01/gen_webstats.py
@@ -17,26 +17,22 @@
 
 sp.random.seed(3)  # to reproduce the data later on
 
-x = sp.arange(1, 31 * 24)
-y = sp.array(200 * (sp.sin(2 * sp.pi * x / (7 * 24))), dtype=int)
+x = sp.arange(1, 31*24)
+y = sp.array(200*(sp.sin(2*sp.pi*x/(7*24))), dtype=int)
 y += gamma.rvs(15, loc=0, scale=100, size=len(x))
-y += 2 * sp.exp(x / 100.0)
-y = sp.ma.array(y, mask=[y < 0])
-print(sum(y), sum(y < 0))
+y += 2 * sp.exp(x/100.0)
+y = sp.ma.array(y, mask=[y<0])
+print(sum(y), sum(y<0))
 
 plt.scatter(x, y)
 plt.title("Web traffic over the last month")
 plt.xlabel("Time")
 plt.ylabel("Hits/hour")
-plt.xticks([w * 7 * 24 for w in [0, 1, 2, 3, 4]], ['week %i' % (w + 1) for w in
-                                                   [0, 1, 2, 3, 4]])
-
+plt.xticks([w*7*24 for w in range(5)], 
+           ['week %i' %(w+1) for w in range(5)])
 plt.autoscale(tight=True)
 plt.grid()
 plt.savefig(os.path.join(CHART_DIR, "1400_01_01.png"))
 
-# sp.savetxt(os.path.join("..", "web_traffic.tsv"),
-# zip(x[~y.mask],y[~y.mask]), delimiter="\t", fmt="%i")
-
-sp.savetxt(os.path.join(
-    DATA_DIR, "web_traffic.tsv"), list(zip(x, y)), delimiter="\t", fmt="%s")
+sp.savetxt(os.path.join(DATA_DIR, "web_traffic.tsv"), 
+           list(zip(x, y)), delimiter="\t", fmt="%s")

From 98d66ea5b9775bda9efc25d0ce8710fb576f2be7 Mon Sep 17 00:00:00 2001
From: Luis Pedro Coelho <luis@luispedro.org>
Date: Thu, 21 Jul 2016 23:38:44 +0200
Subject: [PATCH 5/9] MIN Remove extraneous function call

---
 ch02/chapter.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ch02/chapter.py b/ch02/chapter.py
index ac887650..c68b45ab 100644
--- a/ch02/chapter.py
+++ b/ch02/chapter.py
@@ -100,7 +100,6 @@ def is_virginica_test(fi, t, reverse, example):
     training[ei] = False
     testing = ~training
     model = fit_model(features[training], is_virginica[training])
-    predict(model, features[testing])
     predictions = predict(model, features[testing])
     correct += np.sum(predictions == is_virginica[testing])
 acc = correct/float(len(features))

From c0a3b3a397d22c6941012c78d24c0fcfefe740fc Mon Sep 17 00:00:00 2001
From: Luis Pedro Coelho <luis@luispedro.org>
Date: Sun, 27 Nov 2016 11:05:39 +0100
Subject: [PATCH 6/9] DOC Make explicit how to get AP data

---
 ch04/README.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/ch04/README.rst b/ch04/README.rst
index 7fe0a92f..99a3c186 100644
--- a/ch04/README.rst
+++ b/ch04/README.rst
@@ -4,6 +4,16 @@ Chapter 4
 
 Support code for *Chapter 4: Topic Modeling*
 
+
+AP Data
+-------
+
+To download the AP data, use the ``download_ap.sh`` script inside the ``data``
+directory::
+
+    cd data
+    ./download_ap.sh
+
 Word cloud creation
 -------------------
 

From c4c71a59644d6f9c80993aed3bae5eb6f453c00c Mon Sep 17 00:00:00 2001
From: Luis Pedro Coelho <luis@luispedro.org>
Date: Tue, 28 Mar 2017 18:47:48 +0200
Subject: [PATCH 7/9] MIN Update link to AP data

---
 ch04/data/download_ap.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ch04/data/download_ap.sh b/ch04/data/download_ap.sh
index 6de8ded8..da27814a 100755
--- a/ch04/data/download_ap.sh
+++ b/ch04/data/download_ap.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
-wget http://www.cs.princeton.edu/~blei/lda-c/ap.tgz
+wget http://www.cs.columbia.edu/~blei/lda-c/ap.tgz
 tar xzf ap.tgz

From a237d75979fe47d6505ea0c3bc731050503064e8 Mon Sep 17 00:00:00 2001
From: Luis Pedro Coelho <luis@luispedro.org>
Date: Sun, 25 Jun 2017 13:55:34 +0200
Subject: [PATCH 8/9] BUG Fixes API usage

For newer versions of gensim at least, topics are represented as `(word,
frequency)`.

closes #21
---
 ch04/blei_lda.py  | 4 ++--
 ch04/wordcloud.py | 4 +---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/ch04/blei_lda.py b/ch04/blei_lda.py
index bbad9d1f..7f6ac2b3 100644
--- a/ch04/blei_lda.py
+++ b/ch04/blei_lda.py
@@ -36,9 +36,9 @@
 # Iterate over all the topics in the model
 for ti in range(model.num_topics):
     words = model.show_topic(ti, 64)
-    tf = sum(f for f, w in words)
+    tf = sum(f for _, f in words)
     with open('topics.txt', 'w') as output:
-        output.write('\n'.join('{}:{}'.format(w, int(1000. * f / tf)) for f, w in words))
+        output.write('\n'.join('{}:{}'.format(w, int(1000. * f / tf)) for w, f in words))
         output.write("\n\n\n")
 
 # We first identify the most discussed topic, i.e., the one with the
diff --git a/ch04/wordcloud.py b/ch04/wordcloud.py
index 6c5302ea..accca2d6 100644
--- a/ch04/wordcloud.py
+++ b/ch04/wordcloud.py
@@ -24,8 +24,6 @@ def create_cloud(oname, words,maxsize=120, fontname='Lobster'):
     # gensim returns a weight between 0 and 1 for each word, while pytagcloud
     # expects an integer word count. So, we multiply by a large number and
     # round. For a visualization this is an adequate approximation.
-    # We also need to flip the order as gensim returns (value, word), whilst
-    # pytagcloud expects (word, value):
-    words = [(w,int(v*10000)) for v,w in words]
+    words = [(w,int(v*10000)) for w,v in words]
     tags = make_tags(words, maxsize=maxsize)
     create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)

From 52891e6bac00213bf94ab1a3b1f2d8d5ed04a774 Mon Sep 17 00:00:00 2001
From: Luis Pedro Coelho <luis@luispedro.org>
Date: Mon, 21 May 2018 15:45:33 +0200
Subject: [PATCH 9/9] BUG Fix function import

---
 ch12/image-classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ch12/image-classification.py b/ch12/image-classification.py
index 09dbd5b4..6f76d26d 100644
--- a/ch12/image-classification.py
+++ b/ch12/image-classification.py
@@ -39,7 +39,7 @@ def compute_texture(im):
 
 @TaskGenerator
 def chist(fname):
-    from features import color_histogram
+    from features import chist as color_histogram
     im = mh.imread(fname)
     return color_histogram(im)