diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..0d20b648
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.pyc
diff --git a/README.md b/README.md
index 05218427..cbcd3155 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,14 @@
Building Machine Learning Systems with Python
=============================================
-Source Code for the book Building Machine Learning Systems with Python by
-[Willi Richert](http://twotoreal.com) and [Luis Pedro
-Coelho](http://luispedro.org).
+Source Code for the book Building Machine Learning Systems with Python by [Luis
+Pedro Coelho](http://luispedro.org) and [Willi Richert](http://twotoreal.com).
-The book was published in 2013 by Packt Publishing and is available [from their
+The book was published in 2013 (second edition in 2015) by Packt Publishing and
+is available [from their
website](http://www.packtpub.com/building-machine-learning-systems-with-python/book).
+The code in the repository corresponds to the second edition. Code for the
+first edition is available in [first\_edition
+branch](https://github.com/luispedro/BuildingMachineLearningSystemsWithPython/tree/first_edition).
diff --git a/SimpleImageDataset/building02.jpg b/SimpleImageDataset/building02.jpg
new file mode 100644
index 00000000..343c5242
Binary files /dev/null and b/SimpleImageDataset/building02.jpg differ
diff --git a/SimpleImageDataset/scene01.jpg b/SimpleImageDataset/scene01.jpg
index 29b327d2..e7b416d3 100644
Binary files a/SimpleImageDataset/scene01.jpg and b/SimpleImageDataset/scene01.jpg differ
diff --git a/SimpleImageDataset/scene02.jpg b/SimpleImageDataset/scene02.jpg
new file mode 100644
index 00000000..89fd6b27
Binary files /dev/null and b/SimpleImageDataset/scene02.jpg differ
diff --git a/SimpleImageDataset/scene08.jpg b/SimpleImageDataset/scene08.jpg
new file mode 100644
index 00000000..7dee8860
Binary files /dev/null and b/SimpleImageDataset/scene08.jpg differ
diff --git a/ch01/analyze_webstats.py b/ch01/analyze_webstats.py
index ccd8fe7c..5da892e2 100644
--- a/ch01/analyze_webstats.py
+++ b/ch01/analyze_webstats.py
@@ -6,13 +6,15 @@
# It is made available under the MIT License
import os
+from utils import DATA_DIR, CHART_DIR
import scipy as sp
import matplotlib.pyplot as plt
-data_dir = os.path.join(
- os.path.dirname(os.path.realpath(__file__)), "..", "data")
-data = sp.genfromtxt(os.path.join(data_dir, "web_traffic.tsv"), delimiter="\t")
+sp.random.seed(3) # to reproduce the data later on
+
+data = sp.genfromtxt(os.path.join(DATA_DIR, "web_traffic.tsv"), delimiter="\t")
print(data[:10])
+print(data.shape)
# all examples will have three classes in this file
colors = ['g', 'k', 'b', 'm', 'r']
@@ -24,10 +26,11 @@
x = x[~sp.isnan(y)]
y = y[~sp.isnan(y)]
-# plot input data
-
def plot_models(x, y, models, fname, mx=None, ymax=None, xmin=None):
+ ''' plot input data '''
+
+ plt.figure(num=None, figsize=(8, 6))
plt.clf()
plt.scatter(x, y, s=10)
plt.title("Web traffic over the last month")
@@ -56,22 +59,26 @@ def plot_models(x, y, models, fname, mx=None, ymax=None, xmin=None):
plt.savefig(fname)
# first look at the data
-plot_models(x, y, None, os.path.join("..", "1400_01_01.png"))
+plot_models(x, y, None, os.path.join(CHART_DIR, "1400_01_01.png"))
# create and plot models
-fp1, res, rank, sv, rcond = sp.polyfit(x, y, 1, full=True)
-print("Model parameters: %s" % fp1)
-print("Error of the model:", res)
+fp1, res1, rank1, sv1, rcond1 = sp.polyfit(x, y, 1, full=True)
+print("Model parameters of fp1: %s" % fp1)
+print("Error of the model of fp1:", res1)
f1 = sp.poly1d(fp1)
-f2 = sp.poly1d(sp.polyfit(x, y, 2))
+
+fp2, res2, rank2, sv2, rcond2 = sp.polyfit(x, y, 2, full=True)
+print("Model parameters of fp2: %s" % fp2)
+print("Error of the model of fp2:", res2)
+f2 = sp.poly1d(fp2)
f3 = sp.poly1d(sp.polyfit(x, y, 3))
f10 = sp.poly1d(sp.polyfit(x, y, 10))
f100 = sp.poly1d(sp.polyfit(x, y, 100))
-plot_models(x, y, [f1], os.path.join("..", "1400_01_02.png"))
-plot_models(x, y, [f1, f2], os.path.join("..", "1400_01_03.png"))
+plot_models(x, y, [f1], os.path.join(CHART_DIR, "1400_01_02.png"))
+plot_models(x, y, [f1, f2], os.path.join(CHART_DIR, "1400_01_03.png"))
plot_models(
- x, y, [f1, f2, f3, f10, f100], os.path.join("..", "1400_01_04.png"))
+ x, y, [f1, f2, f3, f10, f100], os.path.join(CHART_DIR, "1400_01_04.png"))
# fit and plot a model using the knowledge about inflection point
inflection = 3.5 * 7 * 24
@@ -83,7 +90,7 @@ def plot_models(x, y, models, fname, mx=None, ymax=None, xmin=None):
fa = sp.poly1d(sp.polyfit(xa, ya, 1))
fb = sp.poly1d(sp.polyfit(xb, yb, 1))
-plot_models(x, y, [fa, fb], os.path.join("..", "1400_01_05.png"))
+plot_models(x, y, [fa, fb], os.path.join(CHART_DIR, "1400_01_05.png"))
def error(f, x, y):
@@ -102,7 +109,8 @@ def error(f, x, y):
# extrapolating into the future
plot_models(
- x, y, [f1, f2, f3, f10, f100], os.path.join("..", "1400_01_06.png"),
+ x, y, [f1, f2, f3, f10, f100],
+ os.path.join(CHART_DIR, "1400_01_06.png"),
mx=sp.linspace(0 * 7 * 24, 6 * 7 * 24, 100),
ymax=10000, xmin=0 * 7 * 24)
@@ -118,7 +126,8 @@ def error(f, x, y):
print("Error d=%i: %f" % (f.order, error(f, xb, yb)))
plot_models(
- x, y, [fb1, fb2, fb3, fb10, fb100], os.path.join("..", "1400_01_07.png"),
+ x, y, [fb1, fb2, fb3, fb10, fb100],
+ os.path.join(CHART_DIR, "1400_01_07.png"),
mx=sp.linspace(0 * 7 * 24, 6 * 7 * 24, 100),
ymax=10000, xmin=0 * 7 * 24)
@@ -130,6 +139,8 @@ def error(f, x, y):
train = sorted(shuffled[split_idx:])
fbt1 = sp.poly1d(sp.polyfit(xb[train], yb[train], 1))
fbt2 = sp.poly1d(sp.polyfit(xb[train], yb[train], 2))
+print("fbt2(x)= \n%s" % fbt2)
+print("fbt2(x)-100,000= \n%s" % (fbt2-100000))
fbt3 = sp.poly1d(sp.polyfit(xb[train], yb[train], 3))
fbt10 = sp.poly1d(sp.polyfit(xb[train], yb[train], 10))
fbt100 = sp.poly1d(sp.polyfit(xb[train], yb[train], 100))
@@ -139,13 +150,13 @@ def error(f, x, y):
print("Error d=%i: %f" % (f.order, error(f, xb[test], yb[test])))
plot_models(
- x, y, [fbt1, fbt2, fbt3, fbt10, fbt100], os.path.join("..",
- "1400_01_08.png"),
+ x, y, [fbt1, fbt2, fbt3, fbt10, fbt100],
+ os.path.join(CHART_DIR, "1400_01_08.png"),
mx=sp.linspace(0 * 7 * 24, 6 * 7 * 24, 100),
ymax=10000, xmin=0 * 7 * 24)
from scipy.optimize import fsolve
print(fbt2)
print(fbt2 - 100000)
-reached_max = fsolve(fbt2 - 100000, 800) / (7 * 24)
+reached_max = fsolve(fbt2 - 100000, x0=800) / (7 * 24)
print("100,000 hits/hour expected at week %f" % reached_max[0])
diff --git a/ch01/web_traffic.tsv b/ch01/data/web_traffic.tsv
similarity index 100%
rename from ch01/web_traffic.tsv
rename to ch01/data/web_traffic.tsv
diff --git a/ch01/gen_webstats.py b/ch01/gen_webstats.py
index 570f7fe6..61d0b738 100644
--- a/ch01/gen_webstats.py
+++ b/ch01/gen_webstats.py
@@ -13,30 +13,26 @@
from scipy.stats import gamma
import matplotlib.pyplot as plt
+from utils import DATA_DIR, CHART_DIR
+
sp.random.seed(3) # to reproduce the data later on
-x = sp.arange(1, 31 * 24)
-y = sp.array(200 * (sp.sin(2 * sp.pi * x / (7 * 24))), dtype=int)
+x = sp.arange(1, 31*24)
+y = sp.array(200*(sp.sin(2*sp.pi*x/(7*24))), dtype=int)
y += gamma.rvs(15, loc=0, scale=100, size=len(x))
-y += 2 * sp.exp(x / 100.0)
-y = sp.ma.array(y, mask=[y < 0])
-print(sum(y), sum(y < 0))
+y += 2 * sp.exp(x/100.0)
+y = sp.ma.array(y, mask=[y<0])
+print(sum(y), sum(y<0))
plt.scatter(x, y)
plt.title("Web traffic over the last month")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
-plt.xticks([w * 7 * 24 for w in [0, 1, 2, 3, 4]], ['week %i' % (w + 1) for w in [
- 0, 1, 2, 3, 4]])
-
+plt.xticks([w*7*24 for w in range(5)],
+ ['week %i' %(w+1) for w in range(5)])
plt.autoscale(tight=True)
plt.grid()
-plt.savefig(os.path.join("..", "1400_01_01.png"))
-
-data_dir = os.path.join(
- os.path.dirname(os.path.realpath(__file__)), "..", "data")
+plt.savefig(os.path.join(CHART_DIR, "1400_01_01.png"))
-# sp.savetxt(os.path.join("..", "web_traffic.tsv"),
-# zip(x[~y.mask],y[~y.mask]), delimiter="\t", fmt="%i")
-sp.savetxt(os.path.join(
- data_dir, "web_traffic.tsv"), list(zip(x, y)), delimiter="\t", fmt="%s")
+sp.savetxt(os.path.join(DATA_DIR, "web_traffic.tsv"),
+ list(zip(x, y)), delimiter="\t", fmt="%s")
diff --git a/ch01/performance_test.py b/ch01/performance_test.py
index 16f3792d..f2111732 100644
--- a/ch01/performance_test.py
+++ b/ch01/performance_test.py
@@ -8,7 +8,7 @@
import timeit
-normal_py_sec = timeit.timeit('sum(x*x for x in xrange(1000))',
+normal_py_sec = timeit.timeit('sum(x*x for x in range(1000))',
number=10000)
naive_np_sec = timeit.timeit('sum(na*na)',
setup="import numpy as np; na=np.arange(1000)",
diff --git a/ch01/utils.py b/ch01/utils.py
new file mode 100644
index 00000000..7b2ec21b
--- /dev/null
+++ b/ch01/utils.py
@@ -0,0 +1,19 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+import os
+
+DATA_DIR = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "data")
+
+CHART_DIR = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "charts")
+
+for d in [DATA_DIR, CHART_DIR]:
+ if not os.path.exists(d):
+ os.mkdir(d)
+
diff --git a/ch02/README.rst b/ch02/README.rst
new file mode 100644
index 00000000..e2cb729a
--- /dev/null
+++ b/ch02/README.rst
@@ -0,0 +1,55 @@
+=========
+Chapter 2
+=========
+
+Support code for *Chapter 2: Learning How to Classify with Real-world
+Examples*. The directory data contains the seeds dataset, originally downloaded
+from https://archive.ics.uci.edu/ml/datasets/seeds
+
+chapter.py
+ The code as printed in the book.
+
+figure1.py
+ Figure 1 in the book: all 2-by-2 scatter plots
+
+figure2.py
+ Figure 2 in the book: threshold & decision area
+
+figure4_5_sklearn.py
+ Figures 4 and 5 in the book: Knn decision borders before and after feature
+ normalization. This also produces a version of the figure using 11
+ neighbors (not in the book), which shows that the result is smoother, not
+ as sensitive to exact positions of each datapoint.
+
+figure4_5_no_sklearn.py
+ Alternative code for Figures 4 and 5 without using scikit-learn
+
+load.py
+ Code to load the seeds data
+
+simple_threshold.py
+ Code from the book: finds the first partition, between Setosa and the other classes.
+
+stump.py
+ Code from the book: finds the second partition, between Virginica and Versicolor.
+
+threshold.py
+ Functional implementation of a threshold classifier
+
+heldout.py
+ Evalute the threshold model on heldout data
+
+seeds_knn_sklearn.py
+ Demonstrate cross-validation and feature normalization using scikit-learn
+
+seeds_threshold.py
+ Test thresholding model on the seeds dataset (result mention in book, but no code)
+
+seeds_knn_increasing_k.py
+ Test effect of increasing num_neighbors on accuracy.
+
+knn.py
+ Implementation of K-Nearest neighbor without using scikit-learn.
+
+seeds_knn.py
+ Demonstrate cross-validation (without scikit-learn)
diff --git a/ch02/chapter.py b/ch02/chapter.py
new file mode 100644
index 00000000..c68b45ab
--- /dev/null
+++ b/ch02/chapter.py
@@ -0,0 +1,164 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+
+from matplotlib import pyplot as plt
+import numpy as np
+
+# We load the data with load_iris from sklearn
+from sklearn.datasets import load_iris
+data = load_iris()
+
+# load_iris returns an object with several fields
+features = data.data
+feature_names = data.feature_names
+target = data.target
+target_names = data.target_names
+
+for t in range(3):
+ if t == 0:
+ c = 'r'
+ marker = '>'
+ elif t == 1:
+ c = 'g'
+ marker = 'o'
+ elif t == 2:
+ c = 'b'
+ marker = 'x'
+ plt.scatter(features[target == t, 0],
+ features[target == t, 1],
+ marker=marker,
+ c=c)
+# We use NumPy fancy indexing to get an array of strings:
+labels = target_names[target]
+
+# The petal length is the feature at position 2
+plength = features[:, 2]
+
+# Build an array of booleans:
+is_setosa = (labels == 'setosa')
+
+# This is the important step:
+max_setosa =plength[is_setosa].max()
+min_non_setosa = plength[~is_setosa].min()
+print('Maximum of setosa: {0}.'.format(max_setosa))
+
+print('Minimum of others: {0}.'.format(min_non_setosa))
+
+# ~ is the boolean negation operator
+features = features[~is_setosa]
+labels = labels[~is_setosa]
+# Build a new target variable, is_virigina
+is_virginica = (labels == 'virginica')
+
+# Initialize best_acc to impossibly low value
+best_acc = -1.0
+for fi in range(features.shape[1]):
+ # We are going to test all possible thresholds
+ thresh = features[:,fi]
+ for t in thresh:
+
+ # Get the vector for feature `fi`
+ feature_i = features[:, fi]
+ # apply threshold `t`
+ pred = (feature_i > t)
+ acc = (pred == is_virginica).mean()
+ rev_acc = (pred == ~is_virginica).mean()
+ if rev_acc > acc:
+ reverse = True
+ acc = rev_acc
+ else:
+ reverse = False
+
+ if acc > best_acc:
+ best_acc = acc
+ best_fi = fi
+ best_t = t
+ best_reverse = reverse
+
+print(best_fi, best_t, best_reverse, best_acc)
+
+def is_virginica_test(fi, t, reverse, example):
+ 'Apply threshold model to a new example'
+ test = example[fi] > t
+ if reverse:
+ test = not test
+ return test
+from threshold import fit_model, predict
+
+# ning accuracy was 96.0%.
+# ing accuracy was 90.0% (N = 50).
+correct = 0.0
+
+for ei in range(len(features)):
+ # select all but the one at position `ei`:
+ training = np.ones(len(features), bool)
+ training[ei] = False
+ testing = ~training
+ model = fit_model(features[training], is_virginica[training])
+ predictions = predict(model, features[testing])
+ correct += np.sum(predictions == is_virginica[testing])
+acc = correct/float(len(features))
+print('Accuracy: {0:.1%}'.format(acc))
+
+
+###########################################
+############## SEEDS DATASET ##############
+###########################################
+
+from load import load_dataset
+
+feature_names = [
+ 'area',
+ 'perimeter',
+ 'compactness',
+ 'length of kernel',
+ 'width of kernel',
+ 'asymmetry coefficien',
+ 'length of kernel groove',
+]
+features, labels = load_dataset('seeds')
+
+
+
+from sklearn.neighbors import KNeighborsClassifier
+classifier = KNeighborsClassifier(n_neighbors=1)
+from sklearn.cross_validation import KFold
+
+kf = KFold(len(features), n_folds=5, shuffle=True)
+means = []
+for training,testing in kf:
+ # We learn a model for this fold with `fit` and then apply it to the
+ # testing data with `predict`:
+ classifier.fit(features[training], labels[training])
+ prediction = classifier.predict(features[testing])
+
+ # np.mean on an array of booleans returns fraction
+ # of correct decisions for this fold:
+ curmean = np.mean(prediction == labels[testing])
+ means.append(curmean)
+print('Mean accuracy: {:.1%}'.format(np.mean(means)))
+
+
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+
+classifier = KNeighborsClassifier(n_neighbors=1)
+classifier = Pipeline([('norm', StandardScaler()), ('knn', classifier)])
+
+means = []
+for training,testing in kf:
+ # We learn a model for this fold with `fit` and then apply it to the
+ # testing data with `predict`:
+ classifier.fit(features[training], labels[training])
+ prediction = classifier.predict(features[testing])
+
+ # np.mean on an array of booleans returns fraction
+ # of correct decisions for this fold:
+ curmean = np.mean(prediction == labels[testing])
+ means.append(curmean)
+print('Mean accuracy: {:.1%}'.format(np.mean(means)))
diff --git a/ch02/data/seeds.tsv b/ch02/data/seeds.tsv
new file mode 100644
index 00000000..239810cd
--- /dev/null
+++ b/ch02/data/seeds.tsv
@@ -0,0 +1,210 @@
+15.26 14.84 0.871 5.763 3.312 2.221 5.22 Kama
+14.88 14.57 0.8811 5.554 3.333 1.018 4.956 Kama
+14.29 14.09 0.905 5.291 3.337 2.699 4.825 Kama
+13.84 13.94 0.8955 5.324 3.379 2.259 4.805 Kama
+16.14 14.99 0.9034 5.658 3.562 1.355 5.175 Kama
+14.38 14.21 0.8951 5.386 3.312 2.462 4.956 Kama
+14.69 14.49 0.8799 5.563 3.259 3.586 5.219 Kama
+14.11 14.1 0.8911 5.42 3.302 2.7 5.0 Kama
+16.63 15.46 0.8747 6.053 3.465 2.04 5.877 Kama
+16.44 15.25 0.888 5.884 3.505 1.969 5.533 Kama
+15.26 14.85 0.8696 5.714 3.242 4.543 5.314 Kama
+14.03 14.16 0.8796 5.438 3.201 1.717 5.001 Kama
+13.89 14.02 0.888 5.439 3.199 3.986 4.738 Kama
+13.78 14.06 0.8759 5.479 3.156 3.136 4.872 Kama
+13.74 14.05 0.8744 5.482 3.114 2.932 4.825 Kama
+14.59 14.28 0.8993 5.351 3.333 4.185 4.781 Kama
+13.99 13.83 0.9183 5.119 3.383 5.234 4.781 Kama
+15.69 14.75 0.9058 5.527 3.514 1.599 5.046 Kama
+14.7 14.21 0.9153 5.205 3.466 1.767 4.649 Kama
+12.72 13.57 0.8686 5.226 3.049 4.102 4.914 Kama
+14.16 14.4 0.8584 5.658 3.129 3.072 5.176 Kama
+14.11 14.26 0.8722 5.52 3.168 2.688 5.219 Kama
+15.88 14.9 0.8988 5.618 3.507 0.7651 5.091 Kama
+12.08 13.23 0.8664 5.099 2.936 1.415 4.961 Kama
+15.01 14.76 0.8657 5.789 3.245 1.791 5.001 Kama
+16.19 15.16 0.8849 5.833 3.421 0.903 5.307 Kama
+13.02 13.76 0.8641 5.395 3.026 3.373 4.825 Kama
+12.74 13.67 0.8564 5.395 2.956 2.504 4.869 Kama
+14.11 14.18 0.882 5.541 3.221 2.754 5.038 Kama
+13.45 14.02 0.8604 5.516 3.065 3.531 5.097 Kama
+13.16 13.82 0.8662 5.454 2.975 0.8551 5.056 Kama
+15.49 14.94 0.8724 5.757 3.371 3.412 5.228 Kama
+14.09 14.41 0.8529 5.717 3.186 3.92 5.299 Kama
+13.94 14.17 0.8728 5.585 3.15 2.124 5.012 Kama
+15.05 14.68 0.8779 5.712 3.328 2.129 5.36 Kama
+16.12 15.0 0.9 5.709 3.485 2.27 5.443 Kama
+16.2 15.27 0.8734 5.826 3.464 2.823 5.527 Kama
+17.08 15.38 0.9079 5.832 3.683 2.956 5.484 Kama
+14.8 14.52 0.8823 5.656 3.288 3.112 5.309 Kama
+14.28 14.17 0.8944 5.397 3.298 6.685 5.001 Kama
+13.54 13.85 0.8871 5.348 3.156 2.587 5.178 Kama
+13.5 13.85 0.8852 5.351 3.158 2.249 5.176 Kama
+13.16 13.55 0.9009 5.138 3.201 2.461 4.783 Kama
+15.5 14.86 0.882 5.877 3.396 4.711 5.528 Kama
+15.11 14.54 0.8986 5.579 3.462 3.128 5.18 Kama
+13.8 14.04 0.8794 5.376 3.155 1.56 4.961 Kama
+15.36 14.76 0.8861 5.701 3.393 1.367 5.132 Kama
+14.99 14.56 0.8883 5.57 3.377 2.958 5.175 Kama
+14.79 14.52 0.8819 5.545 3.291 2.704 5.111 Kama
+14.86 14.67 0.8676 5.678 3.258 2.129 5.351 Kama
+14.43 14.4 0.8751 5.585 3.272 3.975 5.144 Kama
+15.78 14.91 0.8923 5.674 3.434 5.593 5.136 Kama
+14.49 14.61 0.8538 5.715 3.113 4.116 5.396 Kama
+14.33 14.28 0.8831 5.504 3.199 3.328 5.224 Kama
+14.52 14.6 0.8557 5.741 3.113 1.481 5.487 Kama
+15.03 14.77 0.8658 5.702 3.212 1.933 5.439 Kama
+14.46 14.35 0.8818 5.388 3.377 2.802 5.044 Kama
+14.92 14.43 0.9006 5.384 3.412 1.142 5.088 Kama
+15.38 14.77 0.8857 5.662 3.419 1.999 5.222 Kama
+12.11 13.47 0.8392 5.159 3.032 1.502 4.519 Kama
+11.42 12.86 0.8683 5.008 2.85 2.7 4.607 Kama
+11.23 12.63 0.884 4.902 2.879 2.269 4.703 Kama
+12.36 13.19 0.8923 5.076 3.042 3.22 4.605 Kama
+13.22 13.84 0.868 5.395 3.07 4.157 5.088 Kama
+12.78 13.57 0.8716 5.262 3.026 1.176 4.782 Kama
+12.88 13.5 0.8879 5.139 3.119 2.352 4.607 Kama
+14.34 14.37 0.8726 5.63 3.19 1.313 5.15 Kama
+14.01 14.29 0.8625 5.609 3.158 2.217 5.132 Kama
+14.37 14.39 0.8726 5.569 3.153 1.464 5.3 Kama
+12.73 13.75 0.8458 5.412 2.882 3.533 5.067 Kama
+17.63 15.98 0.8673 6.191 3.561 4.076 6.06 Rosa
+16.84 15.67 0.8623 5.998 3.484 4.675 5.877 Rosa
+17.26 15.73 0.8763 5.978 3.594 4.539 5.791 Rosa
+19.11 16.26 0.9081 6.154 3.93 2.936 6.079 Rosa
+16.82 15.51 0.8786 6.017 3.486 4.004 5.841 Rosa
+16.77 15.62 0.8638 5.927 3.438 4.92 5.795 Rosa
+17.32 15.91 0.8599 6.064 3.403 3.824 5.922 Rosa
+20.71 17.23 0.8763 6.579 3.814 4.451 6.451 Rosa
+18.94 16.49 0.875 6.445 3.639 5.064 6.362 Rosa
+17.12 15.55 0.8892 5.85 3.566 2.858 5.746 Rosa
+16.53 15.34 0.8823 5.875 3.467 5.532 5.88 Rosa
+18.72 16.19 0.8977 6.006 3.857 5.324 5.879 Rosa
+20.2 16.89 0.8894 6.285 3.864 5.173 6.187 Rosa
+19.57 16.74 0.8779 6.384 3.772 1.472 6.273 Rosa
+19.51 16.71 0.878 6.366 3.801 2.962 6.185 Rosa
+18.27 16.09 0.887 6.173 3.651 2.443 6.197 Rosa
+18.88 16.26 0.8969 6.084 3.764 1.649 6.109 Rosa
+18.98 16.66 0.859 6.549 3.67 3.691 6.498 Rosa
+21.18 17.21 0.8989 6.573 4.033 5.78 6.231 Rosa
+20.88 17.05 0.9031 6.45 4.032 5.016 6.321 Rosa
+20.1 16.99 0.8746 6.581 3.785 1.955 6.449 Rosa
+18.76 16.2 0.8984 6.172 3.796 3.12 6.053 Rosa
+18.81 16.29 0.8906 6.272 3.693 3.237 6.053 Rosa
+18.59 16.05 0.9066 6.037 3.86 6.001 5.877 Rosa
+18.36 16.52 0.8452 6.666 3.485 4.933 6.448 Rosa
+16.87 15.65 0.8648 6.139 3.463 3.696 5.967 Rosa
+19.31 16.59 0.8815 6.341 3.81 3.477 6.238 Rosa
+18.98 16.57 0.8687 6.449 3.552 2.144 6.453 Rosa
+18.17 16.26 0.8637 6.271 3.512 2.853 6.273 Rosa
+18.72 16.34 0.881 6.219 3.684 2.188 6.097 Rosa
+16.41 15.25 0.8866 5.718 3.525 4.217 5.618 Rosa
+17.99 15.86 0.8992 5.89 3.694 2.068 5.837 Rosa
+19.46 16.5 0.8985 6.113 3.892 4.308 6.009 Rosa
+19.18 16.63 0.8717 6.369 3.681 3.357 6.229 Rosa
+18.95 16.42 0.8829 6.248 3.755 3.368 6.148 Rosa
+18.83 16.29 0.8917 6.037 3.786 2.553 5.879 Rosa
+18.85 16.17 0.9056 6.152 3.806 2.843 6.2 Rosa
+17.63 15.86 0.88 6.033 3.573 3.747 5.929 Rosa
+19.94 16.92 0.8752 6.675 3.763 3.252 6.55 Rosa
+18.55 16.22 0.8865 6.153 3.674 1.738 5.894 Rosa
+18.45 16.12 0.8921 6.107 3.769 2.235 5.794 Rosa
+19.38 16.72 0.8716 6.303 3.791 3.678 5.965 Rosa
+19.13 16.31 0.9035 6.183 3.902 2.109 5.924 Rosa
+19.14 16.61 0.8722 6.259 3.737 6.682 6.053 Rosa
+20.97 17.25 0.8859 6.563 3.991 4.677 6.316 Rosa
+19.06 16.45 0.8854 6.416 3.719 2.248 6.163 Rosa
+18.96 16.2 0.9077 6.051 3.897 4.334 5.75 Rosa
+19.15 16.45 0.889 6.245 3.815 3.084 6.185 Rosa
+18.89 16.23 0.9008 6.227 3.769 3.639 5.966 Rosa
+20.03 16.9 0.8811 6.493 3.857 3.063 6.32 Rosa
+20.24 16.91 0.8897 6.315 3.962 5.901 6.188 Rosa
+18.14 16.12 0.8772 6.059 3.563 3.619 6.011 Rosa
+16.17 15.38 0.8588 5.762 3.387 4.286 5.703 Rosa
+18.43 15.97 0.9077 5.98 3.771 2.984 5.905 Rosa
+15.99 14.89 0.9064 5.363 3.582 3.336 5.144 Rosa
+18.75 16.18 0.8999 6.111 3.869 4.188 5.992 Rosa
+18.65 16.41 0.8698 6.285 3.594 4.391 6.102 Rosa
+17.98 15.85 0.8993 5.979 3.687 2.257 5.919 Rosa
+20.16 17.03 0.8735 6.513 3.773 1.91 6.185 Rosa
+17.55 15.66 0.8991 5.791 3.69 5.366 5.661 Rosa
+18.3 15.89 0.9108 5.979 3.755 2.837 5.962 Rosa
+18.94 16.32 0.8942 6.144 3.825 2.908 5.949 Rosa
+15.38 14.9 0.8706 5.884 3.268 4.462 5.795 Rosa
+16.16 15.33 0.8644 5.845 3.395 4.266 5.795 Rosa
+15.56 14.89 0.8823 5.776 3.408 4.972 5.847 Rosa
+15.38 14.66 0.899 5.477 3.465 3.6 5.439 Rosa
+17.36 15.76 0.8785 6.145 3.574 3.526 5.971 Rosa
+15.57 15.15 0.8527 5.92 3.231 2.64 5.879 Rosa
+15.6 15.11 0.858 5.832 3.286 2.725 5.752 Rosa
+16.23 15.18 0.885 5.872 3.472 3.769 5.922 Rosa
+13.07 13.92 0.848 5.472 2.994 5.304 5.395 Canadian
+13.32 13.94 0.8613 5.541 3.073 7.035 5.44 Canadian
+13.34 13.95 0.862 5.389 3.074 5.995 5.307 Canadian
+12.22 13.32 0.8652 5.224 2.967 5.469 5.221 Canadian
+11.82 13.4 0.8274 5.314 2.777 4.471 5.178 Canadian
+11.21 13.13 0.8167 5.279 2.687 6.169 5.275 Canadian
+11.43 13.13 0.8335 5.176 2.719 2.221 5.132 Canadian
+12.49 13.46 0.8658 5.267 2.967 4.421 5.002 Canadian
+12.7 13.71 0.8491 5.386 2.911 3.26 5.316 Canadian
+10.79 12.93 0.8107 5.317 2.648 5.462 5.194 Canadian
+11.83 13.23 0.8496 5.263 2.84 5.195 5.307 Canadian
+12.01 13.52 0.8249 5.405 2.776 6.992 5.27 Canadian
+12.26 13.6 0.8333 5.408 2.833 4.756 5.36 Canadian
+11.18 13.04 0.8266 5.22 2.693 3.332 5.001 Canadian
+11.36 13.05 0.8382 5.175 2.755 4.048 5.263 Canadian
+11.19 13.05 0.8253 5.25 2.675 5.813 5.219 Canadian
+11.34 12.87 0.8596 5.053 2.849 3.347 5.003 Canadian
+12.13 13.73 0.8081 5.394 2.745 4.825 5.22 Canadian
+11.75 13.52 0.8082 5.444 2.678 4.378 5.31 Canadian
+11.49 13.22 0.8263 5.304 2.695 5.388 5.31 Canadian
+12.54 13.67 0.8425 5.451 2.879 3.082 5.491 Canadian
+12.02 13.33 0.8503 5.35 2.81 4.271 5.308 Canadian
+12.05 13.41 0.8416 5.267 2.847 4.988 5.046 Canadian
+12.55 13.57 0.8558 5.333 2.968 4.419 5.176 Canadian
+11.14 12.79 0.8558 5.011 2.794 6.388 5.049 Canadian
+12.1 13.15 0.8793 5.105 2.941 2.201 5.056 Canadian
+12.44 13.59 0.8462 5.319 2.897 4.924 5.27 Canadian
+12.15 13.45 0.8443 5.417 2.837 3.638 5.338 Canadian
+11.35 13.12 0.8291 5.176 2.668 4.337 5.132 Canadian
+11.24 13.0 0.8359 5.09 2.715 3.521 5.088 Canadian
+11.02 13.0 0.8189 5.325 2.701 6.735 5.163 Canadian
+11.55 13.1 0.8455 5.167 2.845 6.715 4.956 Canadian
+11.27 12.97 0.8419 5.088 2.763 4.309 5.0 Canadian
+11.4 13.08 0.8375 5.136 2.763 5.588 5.089 Canadian
+10.83 12.96 0.8099 5.278 2.641 5.182 5.185 Canadian
+10.8 12.57 0.859 4.981 2.821 4.773 5.063 Canadian
+11.26 13.01 0.8355 5.186 2.71 5.335 5.092 Canadian
+10.74 12.73 0.8329 5.145 2.642 4.702 4.963 Canadian
+11.48 13.05 0.8473 5.18 2.758 5.876 5.002 Canadian
+12.21 13.47 0.8453 5.357 2.893 1.661 5.178 Canadian
+11.41 12.95 0.856 5.09 2.775 4.957 4.825 Canadian
+12.46 13.41 0.8706 5.236 3.017 4.987 5.147 Canadian
+12.19 13.36 0.8579 5.24 2.909 4.857 5.158 Canadian
+11.65 13.07 0.8575 5.108 2.85 5.209 5.135 Canadian
+12.89 13.77 0.8541 5.495 3.026 6.185 5.316 Canadian
+11.56 13.31 0.8198 5.363 2.683 4.062 5.182 Canadian
+11.81 13.45 0.8198 5.413 2.716 4.898 5.352 Canadian
+10.91 12.8 0.8372 5.088 2.675 4.179 4.956 Canadian
+11.23 12.82 0.8594 5.089 2.821 7.524 4.957 Canadian
+10.59 12.41 0.8648 4.899 2.787 4.975 4.794 Canadian
+10.93 12.8 0.839 5.046 2.717 5.398 5.045 Canadian
+11.27 12.86 0.8563 5.091 2.804 3.985 5.001 Canadian
+11.87 13.02 0.8795 5.132 2.953 3.597 5.132 Canadian
+10.82 12.83 0.8256 5.18 2.63 4.853 5.089 Canadian
+12.11 13.27 0.8639 5.236 2.975 4.132 5.012 Canadian
+12.8 13.47 0.886 5.16 3.126 4.873 4.914 Canadian
+12.79 13.53 0.8786 5.224 3.054 5.483 4.958 Canadian
+13.37 13.78 0.8849 5.32 3.128 4.67 5.091 Canadian
+12.62 13.67 0.8481 5.41 2.911 3.306 5.231 Canadian
+12.76 13.38 0.8964 5.073 3.155 2.828 4.83 Canadian
+12.38 13.44 0.8609 5.219 2.989 5.472 5.045 Canadian
+12.67 13.32 0.8977 4.984 3.135 2.3 4.745 Canadian
+11.18 12.72 0.868 5.009 2.81 4.051 4.828 Canadian
+12.7 13.41 0.8874 5.183 3.091 8.456 5.0 Canadian
+12.37 13.47 0.8567 5.204 2.96 3.919 5.001 Canadian
+12.19 13.2 0.8783 5.137 2.981 3.631 4.87 Canadian
+11.23 12.88 0.8511 5.14 2.795 4.325 5.003 Canadian
+13.2 13.66 0.8883 5.236 3.232 8.315 5.056 Canadian
+11.84 13.21 0.8521 5.175 2.836 3.598 5.044 Canadian
+12.3 13.34 0.8684 5.243 2.974 5.637 5.063 Canadian
diff --git a/ch02/extra/create_tsv.py b/ch02/extra/create_tsv.py
index b0ddee89..e6d7b4fd 100644
--- a/ch02/extra/create_tsv.py
+++ b/ch02/extra/create_tsv.py
@@ -5,7 +5,6 @@
#
# It is made available under the MIT License
-import milksets.iris
import milksets.seeds
@@ -16,5 +15,4 @@ def save_as_tsv(fname, module):
for f, n in zip(features, nlabels):
print >>ofile, "\t".join(map(str, f) + [n])
-save_as_tsv('iris.tsv', milksets.iris)
save_as_tsv('seeds.tsv', milksets.seeds)
diff --git a/ch02/figure1.py b/ch02/figure1.py
index 7b7d1182..4ec6fff8 100644
--- a/ch02/figure1.py
+++ b/ch02/figure1.py
@@ -5,24 +5,38 @@
#
# It is made available under the MIT License
-import numpy as np
-from sklearn.datasets import load_iris
from matplotlib import pyplot as plt
-data = load_iris()
-features = data['data']
-feature_names = data['feature_names']
-target = data['target']
+# We load the data with load_iris from sklearn
+from sklearn.datasets import load_iris
+# load_iris returns an object with several fields
+data = load_iris()
+features = data.data
+feature_names = data.feature_names
+target = data.target
+target_names = data.target_names
+fig,axes = plt.subplots(2, 3)
pairs = [(0, 1), (0, 2), (0, 3), (1, 2), (1, 3), (2, 3)]
+
+# Set up 3 different pairs of (color, marker)
+color_markers = [
+ ('r', '>'),
+ ('g', 'o'),
+ ('b', 'x'),
+ ]
for i, (p0, p1) in enumerate(pairs):
- plt.subplot(2, 3, i + 1)
- for t, marker, c in zip(range(3), ">ox", "rgb"):
- plt.scatter(features[target == t, p0], features[
+ ax = axes.flat[i]
+
+ for t in range(3):
+ # Use a different color/marker for each class `t`
+ c,marker = color_markers[t]
+ ax.scatter(features[target == t, p0], features[
target == t, p1], marker=marker, c=c)
- plt.xlabel(feature_names[p0])
- plt.ylabel(feature_names[p1])
- plt.xticks([])
- plt.yticks([])
-plt.savefig('../1400_02_01.png')
+ ax.set_xlabel(feature_names[p0])
+ ax.set_ylabel(feature_names[p1])
+ ax.set_xticks([])
+ ax.set_yticks([])
+fig.tight_layout()
+fig.savefig('figure1.png')
diff --git a/ch02/figure2.py b/ch02/figure2.py
index 1ed0d021..0b69d395 100644
--- a/ch02/figure2.py
+++ b/ch02/figure2.py
@@ -10,17 +10,25 @@
from matplotlib import pyplot as plt
from sklearn.datasets import load_iris
data = load_iris()
-features = data['data']
-feature_names = data['feature_names']
-species = data['target_names'][data['target']]
+features = data.data
+feature_names = data.feature_names
+target = data.target
+target_names = data.target_names
-setosa = (species == 'setosa')
-features = features[~setosa]
-species = species[~setosa]
-virginica = species == 'virginica'
+# We use NumPy fancy indexing to get an array of strings:
+labels = target_names[target]
-t = 1.75
-p0, p1 = 3, 2
+is_setosa = (labels == 'setosa')
+features = features[~is_setosa]
+labels = labels[~is_setosa]
+is_virginica = (labels == 'virginica')
+
+# Hand fixed thresholds:
+t = 1.65
+t2 = 1.75
+
+# Features to use: 3 & 2
+f0, f1 = 3, 2
if COLOUR_FIGURE:
area1c = (1., .8, .8)
@@ -29,19 +37,27 @@
area1c = (1., 1, 1)
area2c = (.7, .7, .7)
-x0, x1 = [features[:, p0].min() * .9, features[:, p0].max() * 1.1]
-y0, y1 = [features[:, p1].min() * .9, features[:, p1].max() * 1.1]
-
-plt.fill_between([t, x1], [y0, y0], [y1, y1], color=area2c)
-plt.fill_between([x0, t], [y0, y0], [y1, y1], color=area1c)
-plt.plot([t, t], [y0, y1], 'k--', lw=2)
-plt.plot([t - .1, t - .1], [y0, y1], 'k:', lw=2)
-plt.scatter(features[virginica, p0],
- features[virginica, p1], c='b', marker='o')
-plt.scatter(features[~virginica, p0],
- features[~virginica, p1], c='r', marker='x')
-plt.ylim(y0, y1)
-plt.xlim(x0, x1)
-plt.xlabel(feature_names[p0])
-plt.ylabel(feature_names[p1])
-plt.savefig('../1400_02_02.png')
+# Plot from 90% of smallest value to 110% of largest value
+# (all feature values are positive, otherwise this would not work very well)
+
+x0 = features[:, f0].min() * .9
+x1 = features[:, f0].max() * 1.1
+
+y0 = features[:, f1].min() * .9
+y1 = features[:, f1].max() * 1.1
+
+fig,ax = plt.subplots()
+ax.fill_between([t, x1], [y0, y0], [y1, y1], color=area2c)
+ax.fill_between([x0, t], [y0, y0], [y1, y1], color=area1c)
+ax.plot([t, t], [y0, y1], 'k--', lw=2)
+ax.plot([t2, t2], [y0, y1], 'k:', lw=2)
+ax.scatter(features[is_virginica, f0],
+ features[is_virginica, f1], c='b', marker='o', s=40)
+ax.scatter(features[~is_virginica, f0],
+ features[~is_virginica, f1], c='r', marker='x', s=40)
+ax.set_ylim(y0, y1)
+ax.set_xlim(x0, x1)
+ax.set_xlabel(feature_names[f0])
+ax.set_ylabel(feature_names[f1])
+fig.tight_layout()
+fig.savefig('figure2.png')
diff --git a/ch02/figure4_5.py b/ch02/figure4_5_no_sklearn.py
similarity index 60%
rename from ch02/figure4_5.py
rename to ch02/figure4_5_no_sklearn.py
index a32001eb..adc83d73 100644
--- a/ch02/figure4_5.py
+++ b/ch02/figure4_5_no_sklearn.py
@@ -11,7 +11,7 @@
from matplotlib.colors import ListedColormap
from load import load_dataset
import numpy as np
-from knn import learn_model, apply_model, accuracy
+from knn import fit_model, predict
feature_names = [
'area',
@@ -24,42 +24,56 @@
]
-def train_plot(features, labels):
+def plot_decision(features, labels):
+ '''Plots decision boundary for KNN
+
+ Parameters
+ ----------
+ features : ndarray
+ labels : sequence
+
+ Returns
+ -------
+ fig : Matplotlib Figure
+ ax : Matplotlib Axes
+ '''
y0, y1 = features[:, 2].min() * .9, features[:, 2].max() * 1.1
x0, x1 = features[:, 0].min() * .9, features[:, 0].max() * 1.1
X = np.linspace(x0, x1, 100)
Y = np.linspace(y0, y1, 100)
X, Y = np.meshgrid(X, Y)
- model = learn_model(1, features[:, (0, 2)], np.array(labels))
- C = apply_model(
- np.vstack([X.ravel(), Y.ravel()]).T, model).reshape(X.shape)
+ model = fit_model(1, features[:, (0, 2)], np.array(labels))
+ C = predict(
+ model, np.vstack([X.ravel(), Y.ravel()]).T).reshape(X.shape)
if COLOUR_FIGURE:
cmap = ListedColormap([(1., .6, .6), (.6, 1., .6), (.6, .6, 1.)])
else:
cmap = ListedColormap([(1., 1., 1.), (.2, .2, .2), (.6, .6, .6)])
- plt.xlim(x0, x1)
- plt.ylim(y0, y1)
- plt.xlabel(feature_names[0])
- plt.ylabel(feature_names[2])
- plt.pcolormesh(X, Y, C, cmap=cmap)
+ fig,ax = plt.subplots()
+ ax.set_xlim(x0, x1)
+ ax.set_ylim(y0, y1)
+ ax.set_xlabel(feature_names[0])
+ ax.set_ylabel(feature_names[2])
+ ax.pcolormesh(X, Y, C, cmap=cmap)
if COLOUR_FIGURE:
cmap = ListedColormap([(1., .0, .0), (.0, 1., .0), (.0, .0, 1.)])
- plt.scatter(features[:, 0], features[:, 2], c=labels, cmap=cmap)
+ ax.scatter(features[:, 0], features[:, 2], c=labels, cmap=cmap)
else:
for lab, ma in zip(range(3), "Do^"):
- plt.plot(features[labels == lab, 0], features[
+ ax.plot(features[labels == lab, 0], features[
labels == lab, 2], ma, c=(1., 1., 1.))
+ return fig,ax
features, labels = load_dataset('seeds')
names = sorted(set(labels))
labels = np.array([names.index(ell) for ell in labels])
-train_plot(features, labels)
-plt.savefig('../1400_02_04.png')
+fig,ax = plot_decision(features, labels)
+fig.savefig('figure4.png')
features -= features.mean(0)
features /= features.std(0)
-train_plot(features, labels)
-plt.savefig('../1400_02_05.png')
+fig,ax = plot_decision(features, labels)
+fig.savefig('figure5.png')
diff --git a/ch02/figure4_5_sklearn.py b/ch02/figure4_5_sklearn.py
new file mode 100644
index 00000000..55ac0c80
--- /dev/null
+++ b/ch02/figure4_5_sklearn.py
@@ -0,0 +1,85 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+COLOUR_FIGURE = False
+
+from matplotlib import pyplot as plt
+from matplotlib.colors import ListedColormap
+from load import load_dataset
+import numpy as np
+from sklearn.neighbors import KNeighborsClassifier
+
+feature_names = [
+ 'area',
+ 'perimeter',
+ 'compactness',
+ 'length of kernel',
+ 'width of kernel',
+ 'asymmetry coefficien',
+ 'length of kernel groove',
+]
+
+
+def plot_decision(features, labels, num_neighbors=1):
+ '''Plots decision boundary for KNN
+
+ Parameters
+ ----------
+ features : ndarray
+ labels : sequence
+
+ Returns
+ -------
+ fig : Matplotlib Figure
+ ax : Matplotlib Axes
+ '''
+ y0, y1 = features[:, 2].min() * .9, features[:, 2].max() * 1.1
+ x0, x1 = features[:, 0].min() * .9, features[:, 0].max() * 1.1
+ X = np.linspace(x0, x1, 1000)
+ Y = np.linspace(y0, y1, 1000)
+ X, Y = np.meshgrid(X, Y)
+
+ model = KNeighborsClassifier(num_neighbors)
+ model.fit(features[:, (0,2)], labels)
+ C = model.predict(np.vstack([X.ravel(), Y.ravel()]).T).reshape(X.shape)
+ if COLOUR_FIGURE:
+ cmap = ListedColormap([(1., .7, .7), (.7, 1., .7), (.7, .7, 1.)])
+ else:
+ cmap = ListedColormap([(1., 1., 1.), (.2, .2, .2), (.6, .6, .6)])
+ fig,ax = plt.subplots()
+ ax.set_xlim(x0, x1)
+ ax.set_ylim(y0, y1)
+ ax.set_xlabel(feature_names[0])
+ ax.set_ylabel(feature_names[2])
+ ax.pcolormesh(X, Y, C, cmap=cmap)
+ if COLOUR_FIGURE:
+ cmap = ListedColormap([(1., .0, .0), (.1, .6, .1), (.0, .0, 1.)])
+ ax.scatter(features[:, 0], features[:, 2], c=labels, cmap=cmap)
+ else:
+ for lab, ma in zip(range(3), "Do^"):
+ ax.plot(features[labels == lab, 0], features[
+ labels == lab, 2], ma, c=(1., 1., 1.), ms=6)
+ return fig,ax
+
+
+features, labels = load_dataset('seeds')
+names = sorted(set(labels))
+labels = np.array([names.index(ell) for ell in labels])
+
+fig,ax = plot_decision(features, labels)
+fig.tight_layout()
+fig.savefig('figure4sklearn.png')
+
+features -= features.mean(0)
+features /= features.std(0)
+fig,ax = plot_decision(features, labels)
+fig.tight_layout()
+fig.savefig('figure5sklearn.png')
+
+fig,ax = plot_decision(features, labels, 11)
+fig.tight_layout()
+fig.savefig('figure5sklearn_with_11_neighbors.png')
diff --git a/ch02/heldout.py b/ch02/heldout.py
index 6d1ba9b4..e381e706 100644
--- a/ch02/heldout.py
+++ b/ch02/heldout.py
@@ -5,29 +5,37 @@
#
# It is made available under the MIT License
-from matplotlib import pyplot as plt
+# This script demonstrates the difference between the training accuracy and
+# testing (held-out) accuracy.
+
import numpy as np
from sklearn.datasets import load_iris
-from threshold import learn_model, apply_model, accuracy
+from threshold import fit_model, accuracy
data = load_iris()
features = data['data']
labels = data['target_names'][data['target']]
+# We are going to remove the setosa examples as they are too easy:
+is_setosa = (labels == 'setosa')
+features = features[~is_setosa]
+labels = labels[~is_setosa]
+
+# Now we classify virginica vs non-virginica
+is_virginica = (labels == 'virginica')
-setosa = (labels == 'setosa')
-features = features[~setosa]
-labels = labels[~setosa]
-virginica = (labels == 'virginica')
+# Split the data in two: testing and training
+testing = np.tile([True, False], 50) # testing = [True,False,True,False,True,False...]
-testing = np.tile([True, False], 50)
+# Training is the negation of testing: i.e., datapoints not used for testing,
+# will be used for training
training = ~testing
-model = learn_model(features[training], virginica[training])
-train_error = accuracy(features[training], virginica[training], model)
-test_error = accuracy(features[testing], virginica[testing], model)
+model = fit_model(features[training], is_virginica[training])
+train_accuracy = accuracy(features[training], is_virginica[training], model)
+test_accuracy = accuracy(features[testing], is_virginica[testing], model)
print('''\
-Training error was {0:.1%}.
-Testing error was {1:.1%} (N = {2}).
-'''.format(train_error, test_error, testing.sum()))
+Training accuracy was {0:.1%}.
+Testing accuracy was {1:.1%} (N = {2}).
+'''.format(train_accuracy, test_accuracy, testing.sum()))
diff --git a/ch02/knn.py b/ch02/knn.py
index 75519766..89ebfdb4 100644
--- a/ch02/knn.py
+++ b/ch02/knn.py
@@ -7,12 +7,15 @@
import numpy as np
-
-def learn_model(k, features, labels):
+# This function was called ``learn_model`` in the first edition
+def fit_model(k, features, labels):
+ '''Learn a k-nn model'''
+ # There is no model in k-nn, just a copy of the inputs
return k, features.copy(), labels.copy()
def plurality(xs):
+ '''Find the most common element in a collection'''
from collections import defaultdict
counts = defaultdict(int)
for x in xs:
@@ -22,12 +25,14 @@ def plurality(xs):
if v == maxv:
return k
-
-def apply_model(features, model):
+# This function was called ``apply_model`` in the first edition
+def predict(model, features):
+ '''Apply k-nn model'''
k, train_feats, labels = model
results = []
for f in features:
label_dist = []
+ # Compute all distances:
for t, ell in zip(train_feats, labels):
label_dist.append((np.linalg.norm(f - t), ell))
label_dist.sort(key=lambda d_ell: d_ell[0])
@@ -37,5 +42,5 @@ def apply_model(features, model):
def accuracy(features, labels, model):
- preds = apply_model(features, model)
+ preds = predict(model, features)
return np.mean(preds == labels)
diff --git a/ch02/load.py b/ch02/load.py
index 8950faa3..e508a682 100644
--- a/ch02/load.py
+++ b/ch02/load.py
@@ -21,7 +21,7 @@ def load_dataset(dataset_name):
'''
data = []
labels = []
- with open('../data/{0}.tsv'.format(dataset_name)) as ifile:
+ with open('./data/{0}.tsv'.format(dataset_name)) as ifile:
for line in ifile:
tokens = line.strip().split('\t')
data.append([float(tk) for tk in tokens[:-1]])
diff --git a/ch02/seeds_knn.py b/ch02/seeds_knn.py
index 71dab0ca..c18d9592 100644
--- a/ch02/seeds_knn.py
+++ b/ch02/seeds_knn.py
@@ -7,18 +7,19 @@
from load import load_dataset
import numpy as np
-from knn import learn_model, apply_model, accuracy
+from knn import fit_model, accuracy
features, labels = load_dataset('seeds')
def cross_validate(features, labels):
+ '''Compute cross-validation errors'''
error = 0.0
for fold in range(10):
training = np.ones(len(features), bool)
training[fold::10] = 0
testing = ~training
- model = learn_model(1, features[training], labels[training])
+ model = fit_model(1, features[training], labels[training])
test_error = accuracy(features[testing], labels[testing], model)
error += test_error
@@ -27,6 +28,7 @@ def cross_validate(features, labels):
error = cross_validate(features, labels)
print('Ten fold cross-validated error was {0:.1%}.'.format(error))
+# Z-score (whiten) the features
features -= features.mean(0)
features /= features.std(0)
error = cross_validate(features, labels)
diff --git a/ch02/seeds_knn_increasing_k.py b/ch02/seeds_knn_increasing_k.py
new file mode 100644
index 00000000..7cd8b3f9
--- /dev/null
+++ b/ch02/seeds_knn_increasing_k.py
@@ -0,0 +1,48 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+# Basic imports
+from __future__ import print_function
+import numpy as np
+from matplotlib import pyplot as plt
+from load import load_dataset
+
+
+from sklearn.neighbors import KNeighborsClassifier
+
+from sklearn.cross_validation import cross_val_score
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+
+
+features, labels = load_dataset('seeds')
+
+# Values of k to consider: all in 1 .. 160
+ks = np.arange(1,161)
+
+# We build a classifier object here with the default number of neighbors
+# (It happens to be 5, but it does not matter as we will be changing it below
+classifier = KNeighborsClassifier()
+classifier = Pipeline([('norm', StandardScaler()), ('knn', classifier)])
+
+# accuracies will hold our results
+accuracies = []
+for k in ks:
+ # set the classifier parameter
+ classifier.set_params(knn__n_neighbors=k)
+ crossed = cross_val_score(classifier, features, labels)
+
+ # Save only the average
+ accuracies.append(crossed.mean())
+
+accuracies = np.array(accuracies)
+
+# Scale the accuracies by 100 to plot as a percentage instead of as a fraction
+plt.plot(ks, accuracies*100)
+plt.xlabel('Value for k (nr. of neighbors)')
+plt.ylabel('Accuracy (%)')
+plt.savefig('figure6.png')
diff --git a/ch02/seeds_knn_sklearn.py b/ch02/seeds_knn_sklearn.py
new file mode 100644
index 00000000..ac89bb59
--- /dev/null
+++ b/ch02/seeds_knn_sklearn.py
@@ -0,0 +1,90 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+# Basic imports
+from __future__ import print_function
+import numpy as np
+from load import load_dataset
+
+
+# Import sklearn implementation of KNN
+from sklearn.neighbors import KNeighborsClassifier
+
+features, labels = load_dataset('seeds')
+classifier = KNeighborsClassifier(n_neighbors=4)
+
+
+n = len(features)
+correct = 0.0
+for ei in range(n):
+ training = np.ones(n, bool)
+ training[ei] = 0
+ testing = ~training
+ classifier.fit(features[training], labels[training])
+ pred = classifier.predict(features[ei])
+ correct += (pred == labels[ei])
+print('Result of leave-one-out: {}'.format(correct/n))
+
+# Import KFold object
+from sklearn.cross_validation import KFold
+
+# means will hold the mean for each fold
+means = []
+
+# kf is a generator of pairs (training,testing) so that each iteration
+# implements a separate fold.
+kf = KFold(len(features), n_folds=3, shuffle=True)
+for training,testing in kf:
+ # We learn a model for this fold with `fit` and then apply it to the
+ # testing data with `predict`:
+ classifier.fit(features[training], labels[training])
+ prediction = classifier.predict(features[testing])
+
+ # np.mean on an array of booleans returns the fraction of correct decisions
+ # for this fold:
+ curmean = np.mean(prediction == labels[testing])
+ means.append(curmean)
+print('Result of cross-validation using KFold: {}'.format(means))
+
+# The function cross_val_score does the same thing as the loop above with a
+# single function call
+
+from sklearn.cross_validation import cross_val_score
+crossed = cross_val_score(classifier, features, labels)
+print('Result of cross-validation using cross_val_score: {}'.format(crossed))
+
+# The results above use the features as is, which we learned was not optimal
+# except if the features happen to all be in the same scale. We can pre-scale
+# the features as explained in the main text:
+
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+classifier = Pipeline([('norm', StandardScaler()), ('knn', classifier)])
+crossed = cross_val_score(classifier, features, labels)
+print('Result with prescaling: {}'.format(crossed))
+
+
+# Now, generate & print a cross-validated confusion matrix for the same result
+from sklearn.metrics import confusion_matrix
+names = list(set(labels))
+labels = np.array([names.index(ell) for ell in labels])
+preds = labels.copy()
+preds[:] = -1
+for train, test in kf:
+ classifier.fit(features[train], labels[train])
+ preds[test] = classifier.predict(features[test])
+
+cmat = confusion_matrix(labels, preds)
+print()
+print('Confusion matrix: [rows represent true outcome, columns predicted outcome]')
+print(cmat)
+
+# The explicit float() conversion is necessary in Python 2
+# (Otherwise, result is rounded to 0)
+acc = cmat.trace()/float(cmat.sum())
+print('Accuracy: {0:.1%}'.format(acc))
+
diff --git a/ch02/seeds_threshold.py b/ch02/seeds_threshold.py
index 890b96d8..6b1f87d0 100644
--- a/ch02/seeds_threshold.py
+++ b/ch02/seeds_threshold.py
@@ -7,17 +7,24 @@
from load import load_dataset
import numpy as np
-from threshold import learn_model, apply_model, accuracy
+from threshold import fit_model, accuracy
features, labels = load_dataset('seeds')
-labels = labels == 'Canadian'
+
+# Turn the labels into a binary array
+labels = (labels == 'Canadian')
error = 0.0
for fold in range(10):
training = np.ones(len(features), bool)
+
+ # numpy magic to make an array with 10% of 0s starting at fold
training[fold::10] = 0
+
+ # whatever is not training is for testing
testing = ~training
- model = learn_model(features[training], labels[training])
+
+ model = fit_model(features[training], labels[training])
test_error = accuracy(features[testing], labels[testing], model)
error += test_error
diff --git a/ch02/simple_threshold.py b/ch02/simple_threshold.py
index 3f432512..d174f283 100644
--- a/ch02/simple_threshold.py
+++ b/ch02/simple_threshold.py
@@ -5,7 +5,6 @@
#
# It is made available under the MIT License
-import numpy as np
from sklearn.datasets import load_iris
data = load_iris()
@@ -13,8 +12,14 @@
target = data['target']
target_names = data['target_names']
labels = target_names[target]
-
plength = features[:, 2]
+
+# To use numpy operations to get setosa features,
+# we build a boolean array
is_setosa = (labels == 'setosa')
-print('Maximum of setosa: {0}.'.format(plength[is_setosa].max()))
-print('Minimum of others: {0}.'.format(plength[~is_setosa].min()))
+
+max_setosa = plength[is_setosa].max()
+min_non_setosa = plength[~is_setosa].min()
+
+print('Maximum of setosa: {0}.'.format(max_setosa))
+print('Minimum of others: {0}.'.format(min_non_setosa))
diff --git a/ch02/stump.py b/ch02/stump.py
index 841ce983..0dfaec85 100644
--- a/ch02/stump.py
+++ b/ch02/stump.py
@@ -5,29 +5,51 @@
#
# It is made available under the MIT License
-from matplotlib import pyplot as plt
from sklearn.datasets import load_iris
data = load_iris()
-features = data['data']
-labels = data['target_names'][data['target']]
+features = data.data
+labels = data.target_names[data.target]
-setosa = (labels == 'setosa')
-features = features[~setosa]
-labels = labels[~setosa]
-virginica = (labels == 'virginica')
+is_setosa = (labels == 'setosa')
+features = features[~is_setosa]
+labels = labels[~is_setosa]
+is_virginica = (labels == 'virginica')
+# Initialize to a value that is worse than any possible test
best_acc = -1.0
+
+# Loop over all the features
for fi in range(features.shape[1]):
+ # Test every possible threshold value for feature fi
thresh = features[:, fi].copy()
+
+ # Test them in order
thresh.sort()
for t in thresh:
+
+ # Generate predictions using t as a threshold
pred = (features[:, fi] > t)
- acc = (pred == virginica).mean()
+
+ # Accuracy is the fraction of predictions that match reality
+ acc = (pred == is_virginica).mean()
+
+ # We test whether negating the test is a better threshold:
+ acc_neg = ((~pred) == is_virginica).mean()
+ if acc_neg > acc:
+ acc = acc_neg
+ negated = True
+ else:
+ negated = False
+
+ # If this is better than previous best, then this is now the new best:
+
if acc > best_acc:
best_acc = acc
best_fi = fi
best_t = t
-print('Best cut is {0} on feature {1}, which achieves accuracy of {2:.1%}.'.format(
- best_t, best_fi, best_acc))
+ best_is_negated = negated
+
+print('Best threshold is {0} on feature {1} (index {2}), which achieves accuracy of {3:.1%}.'.format(
+ best_t, data.feature_names[best_fi], best_fi, best_acc))
diff --git a/ch02/threshold.py b/ch02/threshold.py
index c43fead2..d621a350 100644
--- a/ch02/threshold.py
+++ b/ch02/threshold.py
@@ -8,26 +8,48 @@
import numpy as np
-def learn_model(features, labels):
+# This function was called ``learn_model`` in the first edition
+def fit_model(features, labels):
+ '''Learn a simple threshold model'''
best_acc = -1.0
+ # Loop over all the features:
for fi in range(features.shape[1]):
thresh = features[:, fi].copy()
+ # test all feature values in order:
thresh.sort()
for t in thresh:
pred = (features[:, fi] > t)
+
+ # Measure the accuracy of this
acc = (pred == labels).mean()
+
+ rev_acc = (pred == ~labels).mean()
+ if rev_acc > acc:
+ acc = rev_acc
+ reverse = True
+ else:
+ reverse = False
if acc > best_acc:
best_acc = acc
best_fi = fi
best_t = t
- return best_t, best_fi
+ best_reverse = reverse
+ # A model is a threshold and an index
+ return best_t, best_fi, best_reverse
-def apply_model(features, model):
- t, fi = model
- return features[:, fi] > t
+# This function was called ``apply_model`` in the first edition
+def predict(model, features):
+ '''Apply a learned model'''
+ # A model is a pair as returned by fit_model
+ t, fi, reverse = model
+ if reverse:
+ return features[:, fi] <= t
+ else:
+ return features[:, fi] > t
def accuracy(features, labels, model):
- preds = apply_model(features, model)
+ '''Compute the accuracy of the model'''
+ preds = predict(model, features)
return np.mean(preds == labels)
diff --git a/ch03/README.md b/ch03/README.md
new file mode 100644
index 00000000..b74f776b
--- /dev/null
+++ b/ch03/README.md
@@ -0,0 +1,7 @@
+Chapter 3 - Clustering - Finding Related Posts
+==============================================
+
+For this chapter you will need the '20news' dataset from
+http://mlcomp.org/datasets/379. To get the data you will need to
+register, but it is totally free. When being logged in, you will
+see a ZIP download link.
diff --git a/ch03/data/toy/01.txt b/ch03/data/toy/01.txt
new file mode 100644
index 00000000..97ebb966
--- /dev/null
+++ b/ch03/data/toy/01.txt
@@ -0,0 +1 @@
+This is a toy post about machine learning. Actually, it contains not much interesting stuff.
\ No newline at end of file
diff --git a/ch03/data/toy/02.txt b/ch03/data/toy/02.txt
new file mode 100644
index 00000000..f7c82b00
--- /dev/null
+++ b/ch03/data/toy/02.txt
@@ -0,0 +1 @@
+Imaging databases provide storage capabilities.
\ No newline at end of file
diff --git a/ch03/data/toy/03.txt b/ch03/data/toy/03.txt
new file mode 100644
index 00000000..c5e03e90
--- /dev/null
+++ b/ch03/data/toy/03.txt
@@ -0,0 +1 @@
+Most imaging databases save images permanently.
diff --git a/ch03/data/toy/04.txt b/ch03/data/toy/04.txt
new file mode 100644
index 00000000..ebd172f5
--- /dev/null
+++ b/ch03/data/toy/04.txt
@@ -0,0 +1 @@
+Imaging databases store data.
\ No newline at end of file
diff --git a/ch03/data/toy/05.txt b/ch03/data/toy/05.txt
new file mode 100644
index 00000000..f9a26973
--- /dev/null
+++ b/ch03/data/toy/05.txt
@@ -0,0 +1 @@
+Imaging databases store data. Imaging databases store data. Imaging databases store data.
\ No newline at end of file
diff --git a/ch03/noise_analysis.py b/ch03/noise_analysis.py
new file mode 100644
index 00000000..11ca13f4
--- /dev/null
+++ b/ch03/noise_analysis.py
@@ -0,0 +1,69 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+import sklearn.datasets
+
+groups = [
+ 'comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware',
+ 'comp.sys.mac.hardware', 'comp.windows.x', 'sci.space']
+train_data = sklearn.datasets.fetch_20newsgroups(subset="train",
+ categories=groups)
+
+labels = train_data.target
+num_clusters = 50 # sp.unique(labels).shape[0]
+
+import nltk.stem
+english_stemmer = nltk.stem.SnowballStemmer('english')
+
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+
+class StemmedTfidfVectorizer(TfidfVectorizer):
+
+ def build_analyzer(self):
+ analyzer = super(TfidfVectorizer, self).build_analyzer()
+ return lambda doc: (english_stemmer.stem(w) for w in analyzer(doc))
+
+vectorizer = StemmedTfidfVectorizer(min_df=10, max_df=0.5,
+ stop_words='english', decode_error='ignore'
+ )
+vectorized = vectorizer.fit_transform(train_data.data)
+
+post_group = zip(train_data.data, train_data.target)
+# Create a list of tuples that can be sorted by
+# the length of the posts
+all = [(len(post[0]), post[0], train_data.target_names[post[1]])
+ for post in post_group]
+graphics = sorted([post for post in all if post[2] == 'comp.graphics'])
+print(graphics[5])
+# (245, 'From: SITUNAYA@IBM3090.BHAM.AC.UK\nSubject: test....(sorry)\nOrganization:
+# The University of Birmingham, United Kingdom\nLines: 1\nNNTP-Posting-Host: ibm3090.bham.ac.uk
+# \n\n==============================================================================\n',
+# 'comp.graphics')
+
+noise_post = graphics[5][1]
+
+analyzer = vectorizer.build_analyzer()
+print(list(analyzer(noise_post)))
+
+useful = set(analyzer(noise_post)).intersection(vectorizer.get_feature_names())
+print(sorted(useful))
+# ['ac', 'birmingham', 'host', 'kingdom', 'nntp', 'sorri', 'test', 'uk', 'unit', 'univers']
+
+for term in sorted(useful):
+ print('IDF(%s)=%.2f' % (term,
+ vectorizer._tfidf.idf_[vectorizer.vocabulary_[term]]))
+# IDF(ac)=3.51
+# IDF(birmingham)=6.77
+# IDF(host)=1.74
+# IDF(kingdom)=6.68
+# IDF(nntp)=1.77
+# IDF(sorri)=4.14
+# IDF(test)=3.83
+# IDF(uk)=3.70
+# IDF(unit)=4.42
+# IDF(univers)=1.91
diff --git a/ch03/plot_kmeans_example.py b/ch03/plot_kmeans_example.py
index aeec9c36..43b9b71b 100644
--- a/ch03/plot_kmeans_example.py
+++ b/ch03/plot_kmeans_example.py
@@ -15,6 +15,8 @@
from matplotlib import pylab
from sklearn.cluster import KMeans
+from utils import CHART_DIR
+
seed = 2
sp.random.seed(seed) # to reproduce the data later on
@@ -31,7 +33,6 @@ def plot_clustering(x, y, title, mx=None, ymax=None, xmin=None, km=None):
pylab.title(title)
pylab.xlabel("Occurrence word 1")
pylab.ylabel("Occurrence word 2")
- # pylab.xticks([w*7*24 for w in range(10)], ['week %i'%w for w in range(10)])
pylab.autoscale(tight=True)
pylab.ylim(ymin=0, ymax=1)
@@ -55,7 +56,7 @@ def plot_clustering(x, y, title, mx=None, ymax=None, xmin=None, km=None):
i = 1
plot_clustering(x, y, "Vectors")
-pylab.savefig(os.path.join("..", "1400_03_0%i.png" % i))
+pylab.savefig(os.path.join(CHART_DIR, "1400_03_0%i.png" % i))
pylab.clf()
i += 1
@@ -80,7 +81,7 @@ def plot_clustering(x, y, title, mx=None, ymax=None, xmin=None, km=None):
c1a, c1b, c1c = km.cluster_centers_
pylab.scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1],
marker='x', linewidth=2, s=100, color='black')
-pylab.savefig(os.path.join("..", "1400_03_0%i.png" % i))
+pylab.savefig(os.path.join(CHART_DIR, "1400_03_0%i.png" % i))
pylab.clf()
i += 1
@@ -110,7 +111,7 @@ def plot_clustering(x, y, title, mx=None, ymax=None, xmin=None, km=None):
pylab.gca().add_patch(
pylab.Arrow(c1c[0], c1c[1], c2c[0] - c1c[0], c2c[1] - c1c[1], width=0.1))
-pylab.savefig(os.path.join("..", "1400_03_0%i.png" % i))
+pylab.savefig(os.path.join(CHART_DIR, "1400_03_0%i.png" % i))
pylab.clf()
i += 1
@@ -131,7 +132,7 @@ def plot_clustering(x, y, title, mx=None, ymax=None, xmin=None, km=None):
pylab.scatter(km.cluster_centers_[:, 0], km.cluster_centers_[:, 1],
marker='x', linewidth=2, s=100, color='black')
-pylab.savefig(os.path.join("..", "1400_03_0%i.png" % i))
+pylab.savefig(os.path.join(CHART_DIR, "1400_03_0%i.png" % i))
pylab.clf()
i += 1
diff --git a/ch03/rel_post_01.py b/ch03/rel_post_01.py
index b85bf593..9c516180 100644
--- a/ch03/rel_post_01.py
+++ b/ch03/rel_post_01.py
@@ -12,8 +12,10 @@
from sklearn.feature_extraction.text import CountVectorizer
-DIR = r"../data/toy"
-posts = [open(os.path.join(DIR, f)).read() for f in os.listdir(DIR)]
+from utils import DATA_DIR
+
+TOY_DIR = os.path.join(DATA_DIR, "toy")
+posts = [open(os.path.join(TOY_DIR, f)).read() for f in os.listdir(TOY_DIR)]
new_post = "imaging databases"
@@ -41,8 +43,7 @@ def build_analyzer(self):
return lambda doc: (english_stemmer.stem(w) for w in analyzer(doc))
vectorizer = StemmedTfidfVectorizer(
- min_df=1, stop_words='english', charset_error='ignore')
-print(vectorizer)
+ min_df=1, stop_words='english', decode_error='ignore')
X_train = vectorizer.fit_transform(posts)
diff --git a/ch03/rel_post_mlcomp_01.py b/ch03/rel_post_20news.py
similarity index 55%
rename from ch03/rel_post_mlcomp_01.py
rename to ch03/rel_post_20news.py
index 3b11169a..6dad9431 100644
--- a/ch03/rel_post_mlcomp_01.py
+++ b/ch03/rel_post_20news.py
@@ -15,16 +15,28 @@
Any ideas? Thanks.
"""
-MLCOMP_DIR = r"P:\Dropbox\pymlbook\data"
+print("""\
+Dear reader of the 1st edition of 'Building Machine Learning Systems with Python'!
+For the 2nd edition we introduced a couple of changes that will result into
+results that differ from the results in the 1st edition.
+E.g. we now fully rely on scikit's fetch_20newsgroups() instead of requiring
+you to download the data manually from MLCOMP.
+If you have any questions, please ask at http://www.twotoreal.com
+""")
+
+all_data = sklearn.datasets.fetch_20newsgroups(subset="all")
+print("Number of total posts: %i" % len(all_data.filenames))
+# Number of total posts: 18846
+
groups = [
'comp.graphics', 'comp.os.ms-windows.misc', 'comp.sys.ibm.pc.hardware',
- 'comp.sys.ma c.hardware', 'comp.windows.x', 'sci.space']
-dataset = sklearn.datasets.load_mlcomp("20news-18828", "train",
- mlcomp_root=MLCOMP_DIR,
- categories=groups)
-print("Number of posts:", len(dataset.filenames))
+ 'comp.sys.mac.hardware', 'comp.windows.x', 'sci.space']
+train_data = sklearn.datasets.fetch_20newsgroups(subset="train",
+ categories=groups)
+print("Number of training posts in tech groups:", len(train_data.filenames))
+# Number of training posts in tech groups: 3529
-labels = dataset.target
+labels = train_data.target
num_clusters = 50 # sp.unique(labels).shape[0]
import nltk.stem
@@ -40,31 +52,41 @@ def build_analyzer(self):
return lambda doc: (english_stemmer.stem(w) for w in analyzer(doc))
vectorizer = StemmedTfidfVectorizer(min_df=10, max_df=0.5,
- # max_features=1000,
- stop_words='english', charset_error='ignore'
+ stop_words='english', decode_error='ignore'
)
-vectorized = vectorizer.fit_transform(dataset.data)
+
+vectorized = vectorizer.fit_transform(train_data.data)
num_samples, num_features = vectorized.shape
print("#samples: %d, #features: %d" % (num_samples, num_features))
-
+# samples: 3529, #features: 4712
from sklearn.cluster import KMeans
-km = KMeans(n_clusters=num_clusters, init='k-means++', n_init=1,
- verbose=1)
-
+km = KMeans(n_clusters=num_clusters, n_init=1, verbose=1, random_state=3)
clustered = km.fit(vectorized)
+print("km.labels_=%s" % km.labels_)
+# km.labels_=[ 6 34 22 ..., 2 21 26]
+
+print("km.labels_.shape=%s" % km.labels_.shape)
+# km.labels_.shape=3529
+
from sklearn import metrics
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_))
+# Homogeneity: 0.400
print("Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_))
+# Completeness: 0.206
print("V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_))
+# V-measure: 0.272
print("Adjusted Rand Index: %0.3f" %
metrics.adjusted_rand_score(labels, km.labels_))
+# Adjusted Rand Index: 0.064
print("Adjusted Mutual Information: %0.3f" %
metrics.adjusted_mutual_info_score(labels, km.labels_))
+# Adjusted Mutual Information: 0.197
print(("Silhouette Coefficient: %0.3f" %
metrics.silhouette_score(vectorized, labels, sample_size=1000)))
+# Silhouette Coefficient: 0.006
new_post_vec = vectorizer.transform([new_post])
new_post_label = km.predict(new_post_vec)[0]
@@ -74,14 +96,22 @@ def build_analyzer(self):
similar = []
for i in similar_indices:
dist = sp.linalg.norm((new_post_vec - vectorized[i]).toarray())
- similar.append((dist, dataset.data[i]))
+ similar.append((dist, train_data.data[i]))
similar = sorted(similar)
+print("Count similar: %i" % len(similar))
show_at_1 = similar[0]
-show_at_2 = similar[len(similar) / 2]
-show_at_3 = similar[-1]
+show_at_2 = similar[int(len(similar) / 10)]
+show_at_3 = similar[int(len(similar) / 2)]
+print("=== #1 ===")
print(show_at_1)
+print()
+
+print("=== #2 ===")
print(show_at_2)
+print()
+
+print("=== #3 ===")
print(show_at_3)
diff --git a/ch03/utils.py b/ch03/utils.py
new file mode 100644
index 00000000..29d46836
--- /dev/null
+++ b/ch03/utils.py
@@ -0,0 +1,22 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+import os
+import sys
+
+DATA_DIR = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "data")
+
+if not os.path.exists(DATA_DIR):
+ print("Uh, we were expecting a data directory, which contains the toy data")
+ sys.exit(1)
+
+CHART_DIR = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "charts")
+if not os.path.exists(CHART_DIR):
+ os.mkdir(CHART_DIR)
+
diff --git a/ch04/.gitignore b/ch04/.gitignore
new file mode 100644
index 00000000..c4c0b18a
--- /dev/null
+++ b/ch04/.gitignore
@@ -0,0 +1,6 @@
+wiki_lda.pkl
+wiki_lda.pkl.state
+*.png
+*.npy
+*.pkl
+topics.txt
diff --git a/ch04/README.rst b/ch04/README.rst
new file mode 100644
index 00000000..99a3c186
--- /dev/null
+++ b/ch04/README.rst
@@ -0,0 +1,65 @@
+=========
+Chapter 4
+=========
+
+Support code for *Chapter 4: Topic Modeling*
+
+
+AP Data
+-------
+
+To download the AP data, use the ``download_ap.sh`` script inside the ``data``
+directory::
+
+ cd data
+ ./download_ap.sh
+
+Word cloud creation
+-------------------
+
+Word cloud creation requires that ``pytagcloud`` be installed (in turn, this
+requires ``pygame``). Since this is not an essential part of the chapter, the
+code will work even if you have not installed it (naturally, the cloud image
+will not be generated and a warning will be printed).
+
+
+Wikipedia processing
+--------------------
+
+You will need **a lot of disk space**. The download of the Wikipedia text is
+11GB and preprocessing it takes another 24GB to save it in the intermediate
+format that gensim uses for a total of 34GB!
+
+Run the following two commands inside the ``data/`` directory::
+
+ ./download_wp.sh
+ ./preprocess-wikidata.sh
+
+As the filenames indicate, the first step will download the data and the second
+one will preprocess it. Preprocessing can take several hours, but it is
+feasible to run it on a modern laptop. Once the second step is finished, you
+may remove the input file if you want to save disk space
+(``data/enwiki-latest-pages-articles.xml.bz2``).
+
+To generate the model, you can run the ``wikitopics_create.py`` script, while
+the ``wikitopics_plot.py`` script will plot the most heavily discussed topic as
+well as the least heavily discussed one. The code is split into steps as the
+first one can take a very long time. Then it saves the results so that you can
+later explore them at leisure.
+
+You should not expect that your results will exactly match the results in the
+book, for two reasons:
+
+1. The LDA algorithm is a probabilistic algorithm and can give different
+ results every time it is run.
+2. Wikipedia keeps changing. Thus, even your input data will be different.
+
+Scripts
+-------
+
+blei_lda.py
+ Computes LDA using the AP Corpus.
+wikitopics_create.py
+ Create the topic model for Wikipedia using LDA (must download wikipedia database first)
+wikitopics_create_hdp.py
+ Create the topic model for Wikipedia using HDP (must download wikipedia database first)
diff --git a/ch04/blei_lda.py b/ch04/blei_lda.py
index 61362e3e..7f6ac2b3 100644
--- a/ch04/blei_lda.py
+++ b/ch04/blei_lda.py
@@ -6,44 +6,81 @@
# It is made available under the MIT License
from __future__ import print_function
-from gensim import corpora, models, similarities
-from mpltools import style
+from wordcloud import create_cloud
+try:
+ from gensim import corpora, models, matutils
+except:
+ print("import gensim failed.")
+ print()
+ print("Please install it")
+ raise
+
import matplotlib.pyplot as plt
import numpy as np
from os import path
-style.use('ggplot')
+NUM_TOPICS = 100
+
+# Check that data exists
if not path.exists('./data/ap/ap.dat'):
print('Error: Expected data to be present at data/ap/')
+ print('Please cd into ./data & run ./download_ap.sh')
+# Load the data
corpus = corpora.BleiCorpus('./data/ap/ap.dat', './data/ap/vocab.txt')
+
+# Build the topic model
model = models.ldamodel.LdaModel(
- corpus, num_topics=100, id2word=corpus.id2word, alpha=None)
+ corpus, num_topics=NUM_TOPICS, id2word=corpus.id2word, alpha=None)
-for ti in xrange(84):
+# Iterate over all the topics in the model
+for ti in range(model.num_topics):
words = model.show_topic(ti, 64)
- tf = sum(f for f, w in words)
- print('\n'.join('{}:{}'.format(w, int(1000. * f / tf)) for f, w in words))
- print()
- print()
- print()
+ tf = sum(f for _, f in words)
+ with open('topics.txt', 'w') as output:
+ output.write('\n'.join('{}:{}'.format(w, int(1000. * f / tf)) for w, f in words))
+ output.write("\n\n\n")
+
+# We first identify the most discussed topic, i.e., the one with the
+# highest total weight
+
+topics = matutils.corpus2dense(model[corpus], num_terms=model.num_topics)
+weight = topics.sum(1)
+max_topic = weight.argmax()
-thetas = [model[c] for c in corpus]
-plt.hist([len(t) for t in thetas], np.arange(42))
-plt.ylabel('Nr of documents')
-plt.xlabel('Nr of topics')
-plt.savefig('../1400OS_04_01+.png')
+
+# Get the top 64 words for this topic
+# Without the argument, show_topic would return only 10 words
+words = model.show_topic(max_topic, 64)
+
+# This function will actually check for the presence of pytagcloud and is otherwise a no-op
+create_cloud('cloud_blei_lda.png', words)
+
+num_topics_used = [len(model[doc]) for doc in corpus]
+fig,ax = plt.subplots()
+ax.hist(num_topics_used, np.arange(42))
+ax.set_ylabel('Nr of documents')
+ax.set_xlabel('Nr of topics')
+fig.tight_layout()
+fig.savefig('Figure_04_01.png')
+
+
+# Now, repeat the same exercise using alpha=1.0
+# You can edit the constant below to play around with this parameter
+ALPHA = 1.0
model1 = models.ldamodel.LdaModel(
- corpus, num_topics=100, id2word=corpus.id2word, alpha=1.)
-thetas1 = [model1[c] for c in corpus]
-
-#model8 = models.ldamodel.LdaModel(corpus, num_topics=100, id2word=corpus.id2word, alpha=1.e-8)
-#thetas8 = [model8[c] for c in corpus]
-plt.clf()
-plt.hist([[len(t) for t in thetas], [len(t) for t in thetas1]], np.arange(42))
-plt.ylabel('Nr of documents')
-plt.xlabel('Nr of topics')
-plt.text(9, 223, r'default alpha')
-plt.text(26, 156, 'alpha=1.0')
-plt.savefig('../1400OS_04_02+.png')
+ corpus, num_topics=NUM_TOPICS, id2word=corpus.id2word, alpha=ALPHA)
+num_topics_used1 = [len(model1[doc]) for doc in corpus]
+
+fig,ax = plt.subplots()
+ax.hist([num_topics_used, num_topics_used1], np.arange(42))
+ax.set_ylabel('Nr of documents')
+ax.set_xlabel('Nr of topics')
+
+# The coordinates below were fit by trial and error to look good
+ax.text(9, 223, r'default alpha')
+ax.text(26, 156, 'alpha=1.0')
+fig.tight_layout()
+fig.savefig('Figure_04_02.png')
+
diff --git a/ch04/build_lda.py b/ch04/build_lda.py
index 22a029f4..a0ee9c5f 100644
--- a/ch04/build_lda.py
+++ b/ch04/build_lda.py
@@ -4,12 +4,17 @@
# published by PACKT Publishing
#
# It is made available under the MIT License
+from __future__ import print_function
-import nltk.corpus
-import milk
+try:
+ import nltk.corpus
+except ImportError:
+ print("nltk not found")
+ print("please install it")
+ raise
+from scipy.spatial import distance
import numpy as np
-import string
-from gensim import corpora, models, similarities
+from gensim import corpora, models
import sklearn.datasets
import nltk.stem
from collections import defaultdict
@@ -26,8 +31,17 @@ def get_texts(self):
def __len__(self):
return len(self.input)
-dataset = sklearn.datasets.load_mlcomp("20news-18828", "train",
- mlcomp_root='../data')
+try:
+ dataset = sklearn.datasets.load_mlcomp("20news-18828", "train",
+ mlcomp_root='./data')
+except:
+ print("Newsgroup data not found.")
+ print("Please download from http://mlcomp.org/datasets/379")
+ print("And expand the zip into the subdirectory data/")
+ print()
+ print()
+ raise
+
otexts = dataset.data
texts = dataset.data
@@ -62,13 +76,14 @@ def __len__(self):
for i, c in enumerate(corpus):
for ti, v in model[c]:
thetas[i, ti] += v
-distances = milk.unsupervised.pdist(thetas)
+
+distances = distance.squareform(distance.pdist(thetas))
large = distances.max() + 1
-for i in xrange(len(distances)):
+for i in range(len(distances)):
distances[i, i] = large
-print otexts[1]
-print
-print
-print
-print otexts[distances[1].argmin()]
+print(otexts[1])
+print()
+print()
+print()
+print(otexts[distances[1].argmin()])
diff --git a/ch04/data/.gitignore b/ch04/data/.gitignore
index 8eb48cbf..91f24bbc 100644
--- a/ch04/data/.gitignore
+++ b/ch04/data/.gitignore
@@ -1,8 +1,12 @@
ap.tgz
ap/
+dataset-379-20news-18828_HJRZF.zip
+379/
enwiki-latest-pages-articles.xml.bz2
wiki_en_output_bow.mm
+wiki_en_output_bow.mm.gz
wiki_en_output_bow.mm.index
wiki_en_output_tfidf.mm
+wiki_en_output_tfidf.mm.gz
wiki_en_output_tfidf.mm.index
-wiki_en_output_wordids.txt
+wiki_en_output_wordids.txt.bz2
diff --git a/ch04/data/download_ap.sh b/ch04/data/download_ap.sh
new file mode 100755
index 00000000..da27814a
--- /dev/null
+++ b/ch04/data/download_ap.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+wget http://www.cs.columbia.edu/~blei/lda-c/ap.tgz
+tar xzf ap.tgz
diff --git a/ch04/data/download.sh b/ch04/data/download_wp.sh
similarity index 58%
rename from ch04/data/download.sh
rename to ch04/data/download_wp.sh
index 502a0620..243ff03c 100755
--- a/ch04/data/download.sh
+++ b/ch04/data/download_wp.sh
@@ -1,4 +1,2 @@
#!/bin/sh
wget http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
-wget http://www.cs.princeton.edu/~blei/lda-c/ap.tgz
-tar xzf ap.tgz
diff --git a/ch04/data/preprocess-wikidata.sh b/ch04/data/preprocess-wikidata.sh
new file mode 100755
index 00000000..d9a6ebb3
--- /dev/null
+++ b/ch04/data/preprocess-wikidata.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+python -m gensim.scripts.make_wiki enwiki-latest-pages-articles.xml.bz2 wiki_en_output
diff --git a/ch04/wikitopics.py b/ch04/wikitopics.py
deleted file mode 100644
index f04698dc..00000000
--- a/ch04/wikitopics.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-from __future__ import print_function
-import numpy as np
-import logging
-import gensim
-logging.basicConfig(
- format='%(asctime)s : %(levelname)s : %(message)s',
- level=logging.INFO)
-id2word = gensim.corpora.Dictionary.load_from_text(
- 'data/wiki_en_output_wordids.txt')
-mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')
-model = gensim.models.ldamodel.LdaModel(
- corpus=mm,
- id2word=id2word,
- num_topics=100,
- update_every=1,
- chunksize=10000,
- passes=1)
-model.save('wiki_lda.pkl')
-topics = [model[doc] for doc in mm]
-lens = np.array([len(t) for t in topics])
-print(np.mean(lens <= 10))
-print(np.mean(lens))
-
-counts = np.zeros(100)
-for doc_top in topics:
- for ti, _ in doc_toc:
- counts[ti] += 1
-
-for doc_top in topics:
- for ti, _ in doc_top:
- counts[ti] += 1
-
-words = model.show_topic(counts.argmax(), 64)
-print(words)
-print()
-print()
-print()
-words = model.show_topic(counts.argmin(), 64)
-print(words)
-print()
-print()
-print()
diff --git a/ch04/wikitopics_create.py b/ch04/wikitopics_create.py
new file mode 100644
index 00000000..d1aff278
--- /dev/null
+++ b/ch04/wikitopics_create.py
@@ -0,0 +1,51 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+from __future__ import print_function
+import logging
+import gensim
+import numpy as np
+
+NR_OF_TOPICS = 100
+
+# Set up logging in order to get progress information as the model is being built:
+logging.basicConfig(
+ format='%(asctime)s : %(levelname)s : %(message)s',
+ level=logging.INFO)
+
+# Load the preprocessed corpus (id2word & mm):
+id2word = gensim.corpora.Dictionary.load_from_text(
+ 'data/wiki_en_output_wordids.txt.bz2')
+mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')
+
+# Calling the constructor is enough to build the model
+# This call will take a few hours!
+model = gensim.models.ldamodel.LdaModel(
+ corpus=mm,
+ id2word=id2word,
+ num_topics=NR_OF_TOPICS,
+ update_every=1,
+ chunksize=10000,
+ passes=1)
+
+# Save the model so we do not need to learn it again.
+model.save('wiki_lda.pkl')
+
+# Compute the document/topic matrix
+topics = np.zeros((len(mm), model.num_topics))
+for di,doc in enumerate(mm):
+ doc_top = model[doc]
+ for ti,tv in doc_top:
+ topics[di,ti] += tv
+np.save('topics.npy', topics)
+
+# Alternatively, we create a sparse matrix and save that. This alternative
+# saves disk space, at the cost of slightly more complex code:
+
+## from scipy import sparse, io
+## sp = sparse.csr_matrix(topics)
+## io.savemat('topics.mat', {'topics': sp})
diff --git a/ch04/wikitopics_create_hdp.py b/ch04/wikitopics_create_hdp.py
new file mode 100644
index 00000000..951d1850
--- /dev/null
+++ b/ch04/wikitopics_create_hdp.py
@@ -0,0 +1,39 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+from __future__ import print_function
+import logging
+import gensim
+import numpy as np
+
+# Set up logging in order to get progress information as the model is being built:
+logging.basicConfig(
+ format='%(asctime)s : %(levelname)s : %(message)s',
+ level=logging.INFO)
+
+# Load the preprocessed corpus (id2word & mm):
+id2word = gensim.corpora.Dictionary.load_from_text(
+ 'data/wiki_en_output_wordids.txt.bz2')
+mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')
+
+# Calling the constructor is enough to build the model
+# This call will take a few hours!
+model = gensim.models.hdpmodel.HdpModel(
+ corpus=mm,
+ id2word=id2word,
+ chunksize=10000)
+
+# Save the model so we do not need to learn it again.
+model.save('wiki_hdp.pkl')
+
+# Compute the document/topic matrix
+topics = np.zeros((len(mm), model.num_topics))
+for di,doc in enumerate(mm):
+ doc_top = model[doc]
+ for ti,tv in doc_top:
+ topics[di,ti] += tv
+np.save('topics_hdp.npy', topics)
diff --git a/ch04/wikitopics_plot.py b/ch04/wikitopics_plot.py
new file mode 100644
index 00000000..04adf780
--- /dev/null
+++ b/ch04/wikitopics_plot.py
@@ -0,0 +1,64 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+from __future__ import print_function
+import numpy as np
+import gensim
+from os import path
+from wordcloud import create_cloud
+
+if not path.exists('wiki_lda.pkl'):
+ import sys
+ sys.stderr.write('''\
+This script must be run after wikitopics_create.py!
+
+That script creates and saves the LDA model (this must onlly be done once).
+This script is responsible for the analysis.''')
+ sys.exit(1)
+
+# Load the preprocessed Wikipedia corpus (id2word and mm)
+id2word = gensim.corpora.Dictionary.load_from_text(
+ 'data/wiki_en_output_wordids.txt.bz2')
+mm = gensim.corpora.MmCorpus('data/wiki_en_output_tfidf.mm')
+
+# Load the precomputed model
+model = gensim.models.ldamodel.LdaModel.load('wiki_lda.pkl')
+
+topics = np.load('topics.npy', mmap_mode='r')
+
+# Compute the number of topics mentioned in each document
+lens = (topics > 0).sum(axis=1)
+print('Mean number of topics mentioned: {0:.3}'.format(np.mean(lens)))
+print('Percentage of articles mentioning less than 10 topics: {0:.1%}'.format(np.mean(lens <= 10)))
+
+# Weights will be the total weight of each topic
+weights = topics.sum(0)
+
+# Retrieve the most heavily used topic and plot it as a word cloud:
+words = model.show_topic(weights.argmax(), 64)
+
+# The parameter ``maxsize`` often needs some manual tuning to make it look nice.
+create_cloud('Wikipedia_most.png', words, maxsize=250, fontname='Cardo')
+
+fraction_mention = np.mean(topics[:,weights.argmax()] > 0)
+print("The most mentioned topics is mentioned in {:.1%} of documents.".format(fraction_mention))
+total_weight = np.mean(topics[:,weights.argmax()])
+print("It represents {:.1%} of the total number of words.".format(total_weight))
+print()
+print()
+print()
+
+# Retrieve the **least** heavily used topic and plot it as a word cloud:
+words = model.show_topic(weights.argmin(), 64)
+create_cloud('Wikipedia_least.png', words, maxsize=150, fontname='Cardo')
+fraction_mention = np.mean(topics[:,weights.argmin()] > 0)
+print("The least mentioned topics is mentioned in {:.1%} of documents.".format(fraction_mention))
+total_weight = np.mean(topics[:,weights.argmin()])
+print("It represents {:.1%} of the total number of words.".format(total_weight))
+print()
+print()
+print()
diff --git a/ch04/wordcloud.py b/ch04/wordcloud.py
new file mode 100644
index 00000000..accca2d6
--- /dev/null
+++ b/ch04/wordcloud.py
@@ -0,0 +1,29 @@
+from __future__ import print_function
+warned_of_error = False
+
+def create_cloud(oname, words,maxsize=120, fontname='Lobster'):
+ '''Creates a word cloud (when pytagcloud is installed)
+
+ Parameters
+ ----------
+ oname : output filename
+ words : list of (value,str)
+ maxsize : int, optional
+ Size of maximum word. The best setting for this parameter will often
+ require some manual tuning for each input.
+ fontname : str, optional
+ Font to use.
+ '''
+ try:
+ from pytagcloud import create_tag_image, make_tags
+ except ImportError:
+ if not warned_of_error:
+ print("Could not import pytagcloud. Skipping cloud generation")
+ return
+
+ # gensim returns a weight between 0 and 1 for each word, while pytagcloud
+ # expects an integer word count. So, we multiply by a large number and
+ # round. For a visualization this is an adequate approximation.
+ words = [(w,int(v*10000)) for w,v in words]
+ tags = make_tags(words, maxsize=maxsize)
+ create_tag_image(tags, oname, size=(1800, 1200), fontname=fontname)
diff --git a/ch05/README.md b/ch05/README.md
new file mode 100644
index 00000000..39253f22
--- /dev/null
+++ b/ch05/README.md
@@ -0,0 +1,11 @@
+Chapter 5 - Classification - Detecting Poor Answers
+===================================================
+
+The book chapter is based on StackExchange's data blob from August 2012 for the first edition.
+
+After publishing the book, StackExchange released the May 2014 version at
+[https://archive.org/download/stackexchange/stackexchange_archive.torrent](https://archive.org/download/stackexchange/stackexchange_archive.torrent).
+
+Note that using the latest version, you will get slightly different results.
+
+The code is using pyenchant for spell correction. Pyenchent is only used to increase your pleasure of eperimenting with additional features. It is not used later on the chapter. So, if you find out that your platform poses too big problems to install it (e.g. on 64bit Windows), don't bother.
diff --git a/ch05/chose_instances.py b/ch05/chose_instances.py
index f1a1e70c..0614cd32 100644
--- a/ch05/chose_instances.py
+++ b/ch05/chose_instances.py
@@ -15,17 +15,28 @@
try:
import enchant
+ speller = enchant.Dict("en_US")
+
except:
- print(
- "Enchant is not installed. You can get it from http://packages.python.org/pyenchant/. Exitting...")
- sys.exit(1)
+ print("""\
+Enchant is not installed, which is not a problem since spell correction features
+will not be used in the chapter. If, however, you want to experiment with them
+(highly encouraged!), you can get the library from http://packages.python.org/pyenchant/.
+""")
+
+ class EnchantMock:
+
+ def __init__(self):
+ pass
+
+ def check(self, word):
+ return True
+ speller = EnchantMock()
from data import chosen, chosen_meta, filtered, filtered_meta
filtered_meta = json.load(open(filtered_meta, "r"))
-speller = enchant.Dict("en_US")
-
def misspelled_fraction(p):
tokens = p.split()
diff --git a/ch05/classify.py b/ch05/classify.py
index 69832b56..76699819 100644
--- a/ch05/classify.py
+++ b/ch05/classify.py
@@ -16,7 +16,7 @@
from sklearn import neighbors
from data import chosen, chosen_meta
-from utils import plot_roc, plot_pr
+from utils import plot_pr
from utils import plot_feat_importance
from utils import load_meta
from utils import fetch_posts
@@ -30,11 +30,12 @@
import nltk
-# splitting questions into train (70%) and test(30%) and then take their
-# answers
-all_posts = list(meta.keys())
-all_questions = [q for q, v in meta.items() if v['ParentId'] == -1]
-all_answers = [q for q, v in meta.items() if v['ParentId'] != -1] # [:500]
+# The sorting below is only to ensure reproducable numbers. Further down
+# we will occasionally skip a fold when it contains instances of only
+# one label. The two lines below ensure that the behavior is exactly the
+# same for different runs.
+all_questions = sorted([q for q, v in meta.items() if v['ParentId'] == -1])
+all_answers = sorted([q for q, v in meta.items() if v['ParentId'] != -1])
feature_names = np.array((
'NumTextTokens',
@@ -47,20 +48,15 @@
'NumImages'
))
-# activate the following for reduced feature space
-"""
-feature_names = np.array((
- 'NumTextTokens',
- 'LinkCount',
-))
-"""
-
def prepare_sent_features():
for pid, text in fetch_posts(chosen, with_index=True):
if not text:
meta[pid]['AvgSentLen'] = meta[pid]['AvgWordLen'] = 0
else:
+ from platform import python_version
+ if python_version().startswith('2'):
+ text = text.decode('utf-8')
sent_lens = [len(nltk.word_tokenize(
sent)) for sent in nltk.sent_tokenize(text)]
meta[pid]['AvgSentLen'] = np.mean(sent_lens)
@@ -80,17 +76,26 @@ def get_features(aid):
return tuple(meta[aid][fn] for fn in feature_names)
qa_X = np.asarray([get_features(aid) for aid in all_answers])
-# Score > 0 tests => positive class is good answer
-# Score <= 0 tests => positive class is poor answer
-qa_Y = np.asarray([meta[aid]['Score'] > 0 for aid in all_answers])
+
classifying_answer = "good"
+#classifying_answer = "poor"
+
+if classifying_answer == "good":
+ # Score > 0 tests => positive class is good answer
+ qa_Y = np.asarray([meta[aid]['Score'] > 0 for aid in all_answers])
+elif classifying_answer == "poor":
+ # Score <= 0 tests => positive class is poor answer
+ qa_Y = np.asarray([meta[aid]['Score'] <= 0 for aid in all_answers])
+else:
+ raise Exception("classifying_answer='%s' is not supported" %
+ classifying_answer)
for idx, feat in enumerate(feature_names):
plot_feat_hist([(qa_X[:, idx], feat)])
-"""
-plot_feat_hist([(qa_X[:, idx], feature_names[idx]) for idx in [1,0]], 'feat_hist_two.png')
-plot_feat_hist([(qa_X[:, idx], feature_names[idx]) for idx in [3,4,5,6]], 'feat_hist_four.png')
-"""
+
+#plot_feat_hist([(qa_X[:, idx], feature_names[idx]) for idx in [1,0]], 'feat_hist_two.png')
+#plot_feat_hist([(qa_X[:, idx], feature_names[idx]) for idx in [3,4,5,6]], 'feat_hist_four.png')
+
avg_scores_summary = []
@@ -115,10 +120,16 @@ def measure(clf_class, parameters, name, data_size=None, plot=False):
pr_scores = []
precisions, recalls, thresholds = [], [], []
- for train, test in cv:
+ for fold_idx, (train, test) in enumerate(cv):
X_train, y_train = X[train], Y[train]
X_test, y_test = X[test], Y[test]
+ only_one_class_in_train = len(set(y_train)) == 1
+ only_one_class_in_test = len(set(y_test)) == 1
+ if only_one_class_in_train or only_one_class_in_test:
+ # this would pose problems later on
+ continue
+
clf = clf_class(**parameters)
clf.fit(X_train, y_train)
@@ -145,12 +156,20 @@ def measure(clf_class, parameters, name, data_size=None, plot=False):
precisions.append(precision)
recalls.append(recall)
thresholds.append(pr_thresholds)
+
+ # This threshold is determined at the end of the chapter 5,
+ # where we find conditions such that precision is in the area of
+ # about 80%. With it we trade off recall for precision.
+ threshold_for_detecting_good_answers = 0.59
+
+ print("Clone #%i" % fold_idx)
print(classification_report(y_test, proba[:, label_idx] >
- 0.63, target_names=['not accepted', 'accepted']))
+ threshold_for_detecting_good_answers, target_names=['not accepted', 'accepted']))
# get medium clone
scores_to_sort = pr_scores # roc_scores
medium = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
+ print("Medium clone is #%i" % medium)
if plot:
#plot_roc(roc_scores[medium], name, fprs[medium], tprs[medium])
@@ -178,6 +197,7 @@ def measure(clf_class, parameters, name, data_size=None, plot=False):
def bias_variance_analysis(clf_class, parameters, name):
+ #import ipdb;ipdb.set_trace()
data_sizes = np.arange(60, 2000, 4)
train_errors = []
@@ -208,16 +228,16 @@ def k_complexity_analysis(clf_class, parameters):
plot_k_complexity(ks, train_errors, test_errors)
-for k in [5]: # [5, 10, 40, 90]:
+for k in [5]:
+# for k in [5, 10, 40]:
+ #measure(neighbors.KNeighborsClassifier, {'n_neighbors': k}, "%iNN" % k)
bias_variance_analysis(neighbors.KNeighborsClassifier, {
- 'n_neighbors': k, 'warn_on_equidistant': False}, "%iNN" % k)
- k_complexity_analysis(neighbors.KNeighborsClassifier, {'n_neighbors': k,
- 'warn_on_equidistant': False})
- # measure(neighbors.KNeighborsClassifier, {'n_neighbors': k, 'p': 2,
- #'warn_on_equidistant': False}, "%iNN" % k)
+ 'n_neighbors': k}, "%iNN" % k)
+ k_complexity_analysis(neighbors.KNeighborsClassifier, {'n_neighbors': k})
from sklearn.linear_model import LogisticRegression
-for C in [0.1]: # [0.01, 0.1, 1.0, 10.0]:
+for C in [0.1]:
+# for C in [0.01, 0.1, 1.0, 10.0]:
name = "LogReg C=%.2f" % C
bias_variance_analysis(LogisticRegression, {'penalty': 'l2', 'C': C}, name)
measure(LogisticRegression, {'penalty': 'l2', 'C': C}, name, plot=True)
diff --git a/ch05/data.py b/ch05/data.py
index c7b4f781..e3b7e4bb 100644
--- a/ch05/data.py
+++ b/ch05/data.py
@@ -7,9 +7,8 @@
import os
-# DATA_DIR = r"C:\pymlbook-data\ch05"
-DATA_DIR = r"/media/sf_C/pymlbook-data/ch05"
-CHART_DIR = os.path.join("..", "charts")
+DATA_DIR = "data" # put your posts-2012.xml into this directory
+CHART_DIR = "charts"
filtered = os.path.join(DATA_DIR, "filtered.tsv")
filtered_meta = os.path.join(DATA_DIR, "filtered-meta.json")
diff --git a/ch05/log_reg_example.py b/ch05/log_reg_example.py
index 875f4878..bfbbd227 100644
--- a/ch05/log_reg_example.py
+++ b/ch05/log_reg_example.py
@@ -5,6 +5,9 @@
#
# It is made available under the MIT License
+import os
+from data import CHART_DIR
+
import numpy as np
from scipy.stats import norm
@@ -35,7 +38,8 @@ def lr_model(clf, X):
pyplot.xlabel("feature value")
pyplot.ylabel("class")
pyplot.grid(True, linestyle='-', color='0.75')
-pyplot.savefig("log_reg_example_data.png", bbox_inches="tight")
+pyplot.savefig(
+ os.path.join(CHART_DIR, "log_reg_example_data.png"), bbox_inches="tight")
def lin_model(clf, X):
@@ -66,7 +70,8 @@ def lin_model(clf, X):
pyplot.ylabel("class")
pyplot.title("linear fit on additional data")
pyplot.grid(True, linestyle='-', color='0.75')
-pyplot.savefig("log_reg_log_linear_fit.png", bbox_inches="tight")
+pyplot.savefig(
+ os.path.join(CHART_DIR, "log_reg_log_linear_fit.png"), bbox_inches="tight")
pyplot.figure(figsize=(10, 4))
pyplot.xlim((-5, 20))
@@ -76,7 +81,8 @@ def lin_model(clf, X):
pyplot.xlabel("feature value")
pyplot.ylabel("class")
pyplot.grid(True, linestyle='-', color='0.75')
-pyplot.savefig("log_reg_example_fitted.png", bbox_inches="tight")
+pyplot.savefig(
+ os.path.join(CHART_DIR, "log_reg_example_fitted.png"), bbox_inches="tight")
X = np.arange(0, 1, 0.001)
pyplot.figure(figsize=(10, 4))
@@ -94,4 +100,5 @@ def lin_model(clf, X):
pyplot.xlabel("P")
pyplot.ylabel("log(odds) = log(P / (1-P))")
pyplot.grid(True, linestyle='-', color='0.75')
-pyplot.savefig("log_reg_log_odds.png", bbox_inches="tight")
+pyplot.savefig(
+ os.path.join(CHART_DIR, "log_reg_log_odds.png"), bbox_inches="tight")
diff --git a/ch05/so_xml_to_tsv.py b/ch05/so_xml_to_tsv.py
index 46559409..7ce5b150 100644
--- a/ch05/so_xml_to_tsv.py
+++ b/ch05/so_xml_to_tsv.py
@@ -10,6 +10,7 @@
# to a question that has been asked in 2011 or 2012.
#
+import sys
import os
import re
try:
@@ -24,14 +25,19 @@
from data import DATA_DIR
-filename = os.path.join(DATA_DIR, "posts-2011-12.xml")
+#filename = os.path.join(DATA_DIR, "posts-2011-12.xml")
+filename = os.path.join(DATA_DIR, "posts-2012.xml")
+print("Reading from xml %s" % filename)
filename_filtered = os.path.join(DATA_DIR, "filtered.tsv")
+print("Filtered: %s" % filename_filtered)
+filename_filtered_meta = os.path.join(DATA_DIR, "filtered-meta.json")
+print("Meta: %s" % filename_filtered_meta)
q_creation = {} # creation datetimes of questions
q_accepted = {} # id of accepted answer
-meta = defaultdict(
- list) # question -> [(answer Id, IsAccepted, TimeToAnswer, Score), ...]
+# question -> [(answer Id, IsAccepted, TimeToAnswer, Score), ...]
+meta = defaultdict(list)
# regegx to find code snippets
code_match = re.compile('
(.*?)
', re.MULTILINE | re.DOTALL)
@@ -56,20 +62,19 @@ def filter_html(s):
# sometimes source code contain links, which we don't want to count
link_count_in_code += len(link_match.findall(match_str))
- anchors = link_match.findall(s)
- link_count = len(anchors)
+ links = link_match.findall(s)
+ link_count = len(links)
link_count -= link_count_in_code
- html_free_s = re.sub(
+ link_free_s = re.sub(
" +", " ", tag_match.sub('', code_free_s)).replace("\n", "")
- link_free_s = html_free_s
- for anchor in anchors:
- if anchor.lower().startswith("http://"):
- link_free_s = link_free_s.replace(anchor, '')
+ for link in links:
+ if link.lower().startswith("http://"):
+ link_free_s = link_free_s.replace(link, '')
- num_text_tokens = html_free_s.count(" ")
+ num_text_tokens = link_free_s.count(" ")
return link_free_s, num_text_tokens, num_code_lines, link_count, num_images
@@ -77,6 +82,12 @@ def filter_html(s):
num_questions = 0
num_answers = 0
+if sys.version_info.major < 3:
+ # Python 2, map() returns a list, which will lead to out of memory errors.
+ # The following import ensures that the script behaves like being executed
+ # with Python 3.
+ from itertools import imap as map
+
def parsexml(filename):
global num_questions, num_answers
@@ -85,11 +96,12 @@ def parsexml(filename):
it = map(itemgetter(1),
iter(etree.iterparse(filename, events=('start',))))
+
root = next(it) # get posts element
for elem in it:
if counter % 100000 == 0:
- print(counter)
+ print("Processed %i
elements" % counter)
counter += 1
@@ -138,19 +150,19 @@ def parsexml(filename):
values = (Id, ParentId,
IsAccepted,
TimeToAnswer, Score,
- Text,
+ Text.encode("utf-8"),
NumTextTokens, NumCodeLines, LinkCount, NumImages)
yield values
root.clear() # preserve memory
-with open(os.path.join(DATA_DIR, filename_filtered), "w") as f:
- for item in parsexml(filename):
- line = "\t".join(map(str, item))
- f.write(line.encode("utf-8") + "\n")
+with open(filename_filtered, "w") as f:
+ for values in parsexml(filename):
+ line = "\t".join(map(str, values))
+ f.write(line + "\n")
-with open(os.path.join(DATA_DIR, "filtered-meta.json"), "w") as f:
+with open(filename_filtered_meta, "w") as f:
json.dump(meta, f)
print("years:", years)
diff --git a/ch05/utils.py b/ch05/utils.py
index 03429cef..c6e497a5 100644
--- a/ch05/utils.py
+++ b/ch05/utils.py
@@ -171,8 +171,8 @@ def plot_feat_hist(data_name_list, filename=None):
assert filename is not None
pylab.figure(num=None, figsize=(8, 6))
- num_rows = 1 + (len(data_name_list) - 1) / 2
- num_cols = 1 if len(data_name_list) == 1 else 2
+ num_rows = int(1 + (len(data_name_list) - 1) / 2)
+ num_cols = int(1 if len(data_name_list) == 1 else 2)
pylab.figure(figsize=(5 * num_cols, 4 * num_rows))
for i in range(num_rows):
@@ -191,7 +191,7 @@ def plot_feat_hist(data_name_list, filename=None):
else:
bins = max_val
n, bins, patches = pylab.hist(
- x, bins=bins, normed=1, facecolor='blue', alpha=0.75)
+ x, bins=bins, normed=1, alpha=0.75)
pylab.grid(True)
@@ -209,7 +209,7 @@ def plot_bias_variance(data_sizes, train_errors, test_errors, name, title):
pylab.title("Bias-Variance for '%s'" % name)
pylab.plot(
data_sizes, test_errors, "--", data_sizes, train_errors, "b-", lw=1)
- pylab.legend(["train error", "test error"], loc="upper right")
+ pylab.legend(["test error", "train error"], loc="upper right")
pylab.grid(True, linestyle='-', color='0.75')
pylab.savefig(
os.path.join(CHART_DIR, "bv_" + name.replace(" ", "_") + ".png"), bbox_inches="tight")
@@ -220,10 +220,10 @@ def plot_k_complexity(ks, train_errors, test_errors):
pylab.ylim([0.0, 1.0])
pylab.xlabel('k')
pylab.ylabel('Error')
- pylab.title('Errors for for different values of k')
+ pylab.title('Errors for for different values of $k$')
pylab.plot(
ks, test_errors, "--", ks, train_errors, "-", lw=1)
- pylab.legend(["train error", "test error"], loc="upper right")
+ pylab.legend(["test error", "train error"], loc="upper right")
pylab.grid(True, linestyle='-', color='0.75')
pylab.savefig(
os.path.join(CHART_DIR, "kcomplexity.png"), bbox_inches="tight")
diff --git a/ch06/01_start.py b/ch06/01_start.py
index 2940b0fa..0bdbdd12 100644
--- a/ch06/01_start.py
+++ b/ch06/01_start.py
@@ -40,7 +40,7 @@ def create_ngram_model():
def train_model(clf_factory, X, Y, name="NB ngram", plot=False):
cv = ShuffleSplit(
- n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)
+ n=len(X), n_iter=10, test_size=0.3, random_state=0)
train_errors = []
test_errors = []
@@ -83,7 +83,7 @@ def train_model(clf_factory, X, Y, name="NB ngram", plot=False):
summary = (np.mean(scores), np.std(scores),
np.mean(pr_scores), np.std(pr_scores))
- print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
+ print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
return np.mean(train_errors), np.mean(test_errors)
@@ -94,18 +94,18 @@ def print_incorrect(clf, X, Y):
X_wrong = X[wrong_idx]
Y_wrong = Y[wrong_idx]
Y_hat_wrong = Y_hat[wrong_idx]
- for idx in xrange(len(X_wrong)):
- print "clf.predict('%s')=%i instead of %i" %\
- (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
+ for idx in range(len(X_wrong)):
+ print("clf.predict('%s')=%i instead of %i" %
+ (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
if __name__ == "__main__":
X_orig, Y_orig = load_sanders_data()
classes = np.unique(Y_orig)
for c in classes:
- print "#%s: %i" % (c, sum(Y_orig == c))
+ print("#%s: %i" % (c, sum(Y_orig == c)))
- print "== Pos vs. neg =="
+ print("== Pos vs. neg ==")
pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
X = X_orig[pos_neg]
Y = Y_orig[pos_neg]
@@ -113,19 +113,19 @@ def print_incorrect(clf, X, Y):
train_model(create_ngram_model, X, Y, name="pos vs neg", plot=True)
- print "== Pos/neg vs. irrelevant/neutral =="
+ print("== Pos/neg vs. irrelevant/neutral ==")
X = X_orig
Y = tweak_labels(Y_orig, ["positive", "negative"])
train_model(create_ngram_model, X, Y, name="sent vs rest", plot=True)
- print "== Pos vs. rest =="
+ print("== Pos vs. rest ==")
X = X_orig
Y = tweak_labels(Y_orig, ["positive"])
train_model(create_ngram_model, X, Y, name="pos vs rest", plot=True)
- print "== Neg vs. rest =="
+ print("== Neg vs. rest ==")
X = X_orig
Y = tweak_labels(Y_orig, ["negative"])
train_model(create_ngram_model, X, Y, name="neg vs rest", plot=True)
- print "time spent:", time.time() - start_time
+ print("time spent:", time.time() - start_time)
diff --git a/ch06/02_tuning.py b/ch06/02_tuning.py
index a48d3edc..5b6a835f 100644
--- a/ch06/02_tuning.py
+++ b/ch06/02_tuning.py
@@ -45,7 +45,7 @@ def create_ngram_model(params=None):
def grid_search_model(clf_factory, X, Y):
cv = ShuffleSplit(
- n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)
+ n=len(X), n_iter=10, test_size=0.3, random_state=0)
param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
vect__min_df=[1, 2],
@@ -64,7 +64,7 @@ def grid_search_model(clf_factory, X, Y):
verbose=10)
grid_search.fit(X, Y)
clf = grid_search.best_estimator_
- print clf
+ print(clf)
return clf
@@ -114,7 +114,7 @@ def train_model(clf, X, Y, name="NB ngram", plot=False):
summary = (np.mean(scores), np.std(scores),
np.mean(pr_scores), np.std(pr_scores))
- print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
+ print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
return np.mean(train_errors), np.mean(test_errors)
@@ -125,9 +125,9 @@ def print_incorrect(clf, X, Y):
X_wrong = X[wrong_idx]
Y_wrong = Y[wrong_idx]
Y_hat_wrong = Y_hat[wrong_idx]
- for idx in xrange(len(X_wrong)):
- print "clf.predict('%s')=%i instead of %i" %\
- (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
+ for idx in range(len(X_wrong)):
+ print("clf.predict('%s')=%i instead of %i" %
+ (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
def get_best_model():
@@ -149,16 +149,16 @@ def get_best_model():
X_orig, Y_orig = load_sanders_data()
classes = np.unique(Y_orig)
for c in classes:
- print "#%s: %i" % (c, sum(Y_orig == c))
+ print("#%s: %i" % (c, sum(Y_orig == c)))
- print "== Pos vs. neg =="
+ print("== Pos vs. neg ==")
pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
X = X_orig[pos_neg]
Y = Y_orig[pos_neg]
Y = tweak_labels(Y, ["positive"])
train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
- print "== Pos/neg vs. irrelevant/neutral =="
+ print("== Pos/neg vs. irrelevant/neutral ==")
X = X_orig
Y = tweak_labels(Y_orig, ["positive", "negative"])
@@ -166,16 +166,16 @@ def get_best_model():
# rest", plot=True)
train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
- print "== Pos vs. rest =="
+ print("== Pos vs. rest ==")
X = X_orig
Y = tweak_labels(Y_orig, ["positive"])
train_model(get_best_model(), X, Y, name="pos vs rest",
plot=True)
- print "== Neg vs. rest =="
+ print("== Neg vs. rest ==")
X = X_orig
Y = tweak_labels(Y_orig, ["negative"])
train_model(get_best_model(), X, Y, name="neg vs rest",
plot=True)
- print "time spent:", time.time() - start_time
+ print("time spent:", time.time() - start_time)
diff --git a/ch06/03_clean.py b/ch06/03_clean.py
index f00c2c44..0170ed82 100644
--- a/ch06/03_clean.py
+++ b/ch06/03_clean.py
@@ -57,7 +57,7 @@
}
emo_repl_order = [k for (k_len, k) in reversed(
- sorted([(len(k), k) for k in emo_repl.keys()]))]
+ sorted([(len(k), k) for k in list(emo_repl.keys())]))]
re_repl = {
r"\br\b": "are",
@@ -84,7 +84,7 @@ def preprocessor(tweet):
for k in emo_repl_order:
tweet = tweet.replace(k, emo_repl[k])
- for r, repl in re_repl.iteritems():
+ for r, repl in re_repl.items():
tweet = re.sub(r, repl, tweet)
return tweet
@@ -103,7 +103,7 @@ def preprocessor(tweet):
def train_model(clf, X, Y, name="NB ngram", plot=False):
# create it again for plotting
cv = ShuffleSplit(
- n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)
+ n=len(X), n_iter=10, test_size=0.3, random_state=0)
train_errors = []
test_errors = []
@@ -150,7 +150,7 @@ def train_model(clf, X, Y, name="NB ngram", plot=False):
summary = (np.mean(scores), np.std(scores),
np.mean(pr_scores), np.std(pr_scores))
- print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
+ print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
return np.mean(train_errors), np.mean(test_errors)
@@ -161,9 +161,9 @@ def print_incorrect(clf, X, Y):
X_wrong = X[wrong_idx]
Y_wrong = Y[wrong_idx]
Y_hat_wrong = Y_hat[wrong_idx]
- for idx in xrange(len(X_wrong)):
- print "clf.predict('%s')=%i instead of %i" %\
- (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
+ for idx in range(len(X_wrong)):
+ print("clf.predict('%s')=%i instead of %i" %
+ (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
def get_best_model():
@@ -185,16 +185,16 @@ def get_best_model():
X_orig, Y_orig = load_sanders_data()
classes = np.unique(Y_orig)
for c in classes:
- print "#%s: %i" % (c, sum(Y_orig == c))
+ print("#%s: %i" % (c, sum(Y_orig == c)))
- print "== Pos vs. neg =="
+ print("== Pos vs. neg ==")
pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
X = X_orig[pos_neg]
Y = Y_orig[pos_neg]
Y = tweak_labels(Y, ["positive"])
train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
- print "== Pos/neg vs. irrelevant/neutral =="
+ print("== Pos/neg vs. irrelevant/neutral ==")
X = X_orig
Y = tweak_labels(Y_orig, ["positive", "negative"])
@@ -202,16 +202,16 @@ def get_best_model():
# rest", plot=True)
train_model(get_best_model(), X, Y, name="pos+neg vs rest", plot=True)
- print "== Pos vs. rest =="
+ print("== Pos vs. rest ==")
X = X_orig
Y = tweak_labels(Y_orig, ["positive"])
train_model(get_best_model(), X, Y, name="pos vs rest",
plot=True)
- print "== Neg vs. rest =="
+ print("== Neg vs. rest ==")
X = X_orig
Y = tweak_labels(Y_orig, ["negative"])
train_model(get_best_model(), X, Y, name="neg vs rest",
plot=True)
- print "time spent:", time.time() - start_time
+ print("time spent:", time.time() - start_time)
diff --git a/ch06/04_sent.py b/ch06/04_sent.py
index 87fbafc9..c09a435f 100644
--- a/ch06/04_sent.py
+++ b/ch06/04_sent.py
@@ -153,7 +153,7 @@ def transform(self, documents):
}
emo_repl_order = [k for (k_len, k) in reversed(
- sorted([(len(k), k) for k in emo_repl.keys()]))]
+ sorted([(len(k), k) for k in list(emo_repl.keys())]))]
re_repl = {
r"\br\b": "are",
@@ -179,7 +179,7 @@ def preprocessor(tweet):
for k in emo_repl_order:
tweet = tweet.replace(k, emo_repl[k])
- for r, repl in re_repl.iteritems():
+ for r, repl in re_repl.items():
tweet = re.sub(r, repl, tweet)
return tweet.replace("-", " ").replace("_", " ")
@@ -202,7 +202,7 @@ def preprocessor(tweet):
def __grid_search_model(clf_factory, X, Y):
cv = ShuffleSplit(
- n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)
+ n=len(X), n_iter=10, test_size=0.3, random_state=0)
param_grid = dict(vect__ngram_range=[(1, 1), (1, 2), (1, 3)],
vect__min_df=[1, 2],
@@ -220,7 +220,7 @@ def __grid_search_model(clf_factory, X, Y):
verbose=10)
grid_search.fit(X, Y)
clf = grid_search.best_estimator_
- print clf
+ print(clf)
return clf
@@ -228,7 +228,7 @@ def __grid_search_model(clf_factory, X, Y):
def train_model(clf, X, Y, name="NB ngram", plot=False):
# create it again for plotting
cv = ShuffleSplit(
- n=len(X), n_iter=10, test_size=0.3, indices=True, random_state=0)
+ n=len(X), n_iter=10, test_size=0.3, random_state=0)
train_errors = []
test_errors = []
@@ -275,7 +275,7 @@ def train_model(clf, X, Y, name="NB ngram", plot=False):
summary = (np.mean(scores), np.std(scores),
np.mean(pr_scores), np.std(pr_scores))
- print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
+ print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
return np.mean(train_errors), np.mean(test_errors)
@@ -286,9 +286,9 @@ def print_incorrect(clf, X, Y):
X_wrong = X[wrong_idx]
Y_wrong = Y[wrong_idx]
Y_hat_wrong = Y_hat[wrong_idx]
- for idx in xrange(len(X_wrong)):
- print "clf.predict('%s')=%i instead of %i" %\
- (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx])
+ for idx in range(len(X_wrong)):
+ print("clf.predict('%s')=%i instead of %i" %
+ (X_wrong[idx], Y_hat_wrong[idx], Y_wrong[idx]))
def get_best_model():
@@ -315,16 +315,16 @@ def get_best_model():
#Y_orig = Y_orig[:100,]
classes = np.unique(Y_orig)
for c in classes:
- print "#%s: %i" % (c, sum(Y_orig == c))
+ print("#%s: %i" % (c, sum(Y_orig == c)))
- print "== Pos vs. neg =="
+ print("== Pos vs. neg ==")
pos_neg = np.logical_or(Y_orig == "positive", Y_orig == "negative")
X = X_orig[pos_neg]
Y = Y_orig[pos_neg]
Y = tweak_labels(Y, ["positive"])
train_model(get_best_model(), X, Y, name="pos vs neg", plot=True)
- print "== Pos/neg vs. irrelevant/neutral =="
+ print("== Pos/neg vs. irrelevant/neutral ==")
X = X_orig
Y = tweak_labels(Y_orig, ["positive", "negative"])
@@ -332,18 +332,18 @@ def get_best_model():
# rest", plot=True)
train_model(get_best_model(), X, Y, name="pos+neg vs rest", plot=True)
- print "== Pos vs. rest =="
+ print("== Pos vs. rest ==")
X = X_orig
Y = tweak_labels(Y_orig, ["positive"])
train_model(get_best_model(), X, Y, name="pos vs rest",
plot=True)
- print "== Neg vs. rest =="
+ print("== Neg vs. rest ==")
X = X_orig
Y = tweak_labels(Y_orig, ["negative"])
train_model(get_best_model(), X, Y, name="neg vs rest",
plot=True)
- print "time spent:", time.time() - start_time
+ print("time spent:", time.time() - start_time)
json.dump(poscache, open(poscache_filename, "w"))
diff --git a/ch06/README.md b/ch06/README.md
new file mode 100644
index 00000000..91e67f35
--- /dev/null
+++ b/ch06/README.md
@@ -0,0 +1,14 @@
+Chapter 6 - Classification II - Sentiment Analysis
+==================================================
+
+When doing last code sanity checks for the book, Twitter
+was using the API 1.0, which did not require authentication.
+With its switch to version 1.1, this has now changed.
+
+If you don't have already created your personal Twitter
+access keys and tokens, you might want to do so at
+[https://dev.twitter.com/docs/auth/tokens-devtwittercom](https://dev.twitter.com/docs/auth/tokens-devtwittercom) and paste the keys/secrets into twitterauth.py
+
+According to [https://dev.twitter.com/docs/rate-limiting/1](https://dev.twitter.com/docs/rate-limiting/1) Twitter has a rate limit of fetching 350 tweets/h for authorized users.
+
+Note that some tweets might be missing when you are running install.py (user got suspended, changed authorization, or tweet deleted) and thus you might get different results. We keep track of those tweet IDs in data/{missing,not_authorized}.tsv, so that they are not fetched when you run install.py.
diff --git a/ch06/data/corpus.csv b/ch06/data/corpus.csv
new file mode 100755
index 00000000..3c2b6926
--- /dev/null
+++ b/ch06/data/corpus.csv
@@ -0,0 +1,5513 @@
+"apple","positive","126415614616154112"
+"apple","positive","126404574230740992"
+"apple","positive","126402758403305474"
+"apple","positive","126397179614068736"
+"apple","positive","126395626979196928"
+"apple","positive","126394830791254016"
+"apple","positive","126379685453119488"
+"apple","positive","126377656416612353"
+"apple","positive","126373779483004928"
+"apple","positive","126366353757179904"
+"apple","positive","126366123368267776"
+"apple","positive","126365858481188864"
+"apple","positive","126360935509135362"
+"apple","positive","126360398885687296"
+"apple","positive","126358340220616704"
+"apple","positive","126357982685569024"
+"apple","positive","126354605130002432"
+"apple","positive","126352268705538048"
+"apple","positive","126350948548354048"
+"apple","positive","126350302113824769"
+"apple","positive","126349695676203009"
+"apple","positive","126344048637259776"
+"apple","positive","126342268603998208"
+"apple","positive","126325800080392193"
+"apple","positive","126324177501302784"
+"apple","positive","126323785145126912"
+"apple","positive","126322063332999169"
+"apple","positive","126319186141130752"
+"apple","positive","126318009647235072"
+"apple","positive","126315223060709376"
+"apple","positive","126315011600678913"
+"apple","positive","126314687116750849"
+"apple","positive","126312877916307458"
+"apple","positive","126311981564178432"
+"apple","positive","126307801046847488"
+"apple","positive","126302673820594176"
+"apple","positive","126301956951117826"
+"apple","positive","126287654093471745"
+"apple","positive","126284506360578049"
+"apple","positive","126267185025916928"
+"apple","positive","126263834968211456"
+"apple","positive","126256230397259776"
+"apple","positive","126213333123743744"
+"apple","positive","126195522691280896"
+"apple","positive","126183339945234432"
+"apple","positive","126180209501286400"
+"apple","positive","126164430546403328"
+"apple","positive","126148685737361408"
+"apple","positive","126140794078892033"
+"apple","positive","126134400466419712"
+"apple","positive","126130991365500928"
+"apple","positive","126116898051076096"
+"apple","positive","126116614495154176"
+"apple","positive","126112836219973632"
+"apple","positive","126107965991297024"
+"apple","positive","126104732426186752"
+"apple","positive","126097426493878272"
+"apple","positive","126095744531832832"
+"apple","positive","126093298619252737"
+"apple","positive","126084907343691776"
+"apple","positive","126079414986485761"
+"apple","positive","126076743613284354"
+"apple","positive","126076238375817216"
+"apple","positive","126075534894571520"
+"apple","positive","126064519943426048"
+"apple","positive","126063569660936193"
+"apple","positive","126063358037340161"
+"apple","positive","126059405941809152"
+"apple","positive","126059399319003136"
+"apple","positive","126057030996852737"
+"apple","positive","126049183865114624"
+"apple","positive","126040352237961217"
+"apple","positive","126040074595999746"
+"apple","positive","126039929523404801"
+"apple","positive","126034495991328768"
+"apple","positive","126026756623831041"
+"apple","positive","126019393460244481"
+"apple","positive","126015087386431488"
+"apple","positive","126009748020658177"
+"apple","positive","126008369562652672"
+"apple","positive","126002597063696384"
+"apple","positive","125999676972470272"
+"apple","positive","125995158679461888"
+"apple","positive","125979338846900224"
+"apple","positive","125978568726560768"
+"apple","positive","125978473712979969"
+"apple","positive","125974505385500672"
+"apple","positive","125960325437722624"
+"apple","positive","125959059957485569"
+"apple","positive","125954443643588608"
+"apple","positive","125947912306954240"
+"apple","positive","125947232359948288"
+"apple","positive","125943290288803841"
+"apple","positive","125940394566483968"
+"apple","positive","125932869389524992"
+"apple","positive","125930171562852353"
+"apple","positive","125925618486489088"
+"apple","positive","125924446430183425"
+"apple","positive","125922999651139584"
+"apple","positive","125910633731461120"
+"apple","positive","125909565031198720"
+"apple","positive","125907732388790272"
+"apple","positive","125902301931126785"
+"apple","positive","125901202591461376"
+"apple","positive","125900497327636480"
+"apple","positive","125898611572740097"
+"apple","positive","125850288488841217"
+"apple","positive","125840039031738368"
+"apple","positive","125816853867151360"
+"apple","positive","125794931439702016"
+"apple","positive","125728717942161408"
+"apple","positive","125727629012770816"
+"apple","positive","125722746100531200"
+"apple","positive","125717622728818688"
+"apple","positive","125714253452812288"
+"apple","positive","125713935344214016"
+"apple","positive","125712433087123456"
+"apple","positive","125708639607599104"
+"apple","positive","125706813583798274"
+"apple","positive","125701161926930433"
+"apple","positive","125699573799845888"
+"apple","positive","125688922410975232"
+"apple","positive","125685656415510528"
+"apple","positive","125681742760771584"
+"apple","positive","125680049478316032"
+"apple","positive","125677424565424128"
+"apple","positive","125673004511412224"
+"apple","positive","125667241978114048"
+"apple","positive","125665606853861376"
+"apple","positive","125664375364255744"
+"apple","positive","125662399217930240"
+"apple","positive","125652668080336896"
+"apple","positive","125648027045199873"
+"apple","positive","125645258003464192"
+"apple","positive","125643107260829697"
+"apple","positive","125633677597229056"
+"apple","positive","125633065757310976"
+"apple","positive","125628199269961729"
+"apple","positive","125623745284018176"
+"apple","positive","125618466353983488"
+"apple","positive","125616280215617537"
+"apple","positive","125610372727193601"
+"apple","positive","125608381431025664"
+"apple","positive","125596541028282369"
+"apple","positive","125595292304281601"
+"apple","positive","125585606100267008"
+"apple","positive","125562428200202240"
+"apple","positive","125561950376701952"
+"apple","positive","125550135911518209"
+"apple","positive","125547297072357376"
+"apple","positive","125539788546781185"
+"apple","positive","125537993942515712"
+"apple","positive","125537578974851072"
+"apple","positive","125536884813336576"
+"apple","positive","125533599737978882"
+"apple","positive","125524107386302465"
+"apple","positive","125523414298533888"
+"apple","positive","125501576952553472"
+"apple","positive","125501281753251840"
+"apple","positive","125495491701125120"
+"apple","positive","125459338524499969"
+"apple","positive","125458901192810496"
+"apple","positive","125455260801179648"
+"apple","positive","125445056218923008"
+"apple","positive","125423290767507456"
+"apple","positive","125422502284505088"
+"apple","positive","125416879035658240"
+"apple","positive","125407532893224962"
+"apple","positive","125402636764712960"
+"apple","positive","125402412147146752"
+"apple","positive","125393816470568961"
+"apple","positive","125374540107886593"
+"apple","positive","125356807626559488"
+"apple","positive","125343429289984000"
+"apple","positive","125338216411828224"
+"apple","positive","125333598197911552"
+"apple","positive","125330595302744064"
+"apple","positive","125319163366473728"
+"apple","positive","125313088160411649"
+"apple","positive","125309946723188736"
+"apple","positive","125281706327552001"
+"apple","positive","125279987254300672"
+"apple","positive","125279447669669888"
+"apple","positive","125264731035537409"
+"apple","positive","125256305647693825"
+"apple","positive","125252188065902592"
+"apple","positive","125251672896323584"
+"apple","positive","125243911538098176"
+"apple","positive","125238977417580544"
+"apple","positive","125238017299451905"
+"apple","positive","125165176772247552"
+"apple","negative","126418790706712576"
+"apple","negative","126417285559762944"
+"apple","negative","126416915664084992"
+"apple","negative","126416109212680192"
+"apple","negative","126411162622496768"
+"apple","negative","126410591949697024"
+"apple","negative","126409696553861121"
+"apple","negative","126408864387182593"
+"apple","negative","126408052525105153"
+"apple","negative","126407767132078082"
+"apple","negative","126405405667627008"
+"apple","negative","126405185630253056"
+"apple","negative","126405040809312256"
+"apple","negative","126400637930979329"
+"apple","negative","126394680903614465"
+"apple","negative","126393717421645825"
+"apple","negative","126393204550537216"
+"apple","negative","126392402083708928"
+"apple","negative","126391082308206593"
+"apple","negative","126389218284015616"
+"apple","negative","126388194194362369"
+"apple","negative","126388023725268992"
+"apple","negative","126385036441296896"
+"apple","negative","126382959711358976"
+"apple","negative","126382051661328385"
+"apple","negative","126381519513194497"
+"apple","negative","126380588822298625"
+"apple","negative","126380553464315904"
+"apple","negative","126377298650861568"
+"apple","negative","126375381249966080"
+"apple","negative","126372694118768640"
+"apple","negative","126372040696541184"
+"apple","negative","126363154837020672"
+"apple","negative","126361483432038400"
+"apple","negative","126358781633368064"
+"apple","negative","126358301393956866"
+"apple","negative","126358272084152320"
+"apple","negative","126357580741226496"
+"apple","negative","126357227727626240"
+"apple","negative","126354628999778305"
+"apple","negative","126351972948393984"
+"apple","negative","126348169826148352"
+"apple","negative","126344426854416385"
+"apple","negative","126343931117047808"
+"apple","negative","126343679785959424"
+"apple","negative","126343214805426176"
+"apple","negative","126334597431697408"
+"apple","negative","126334188583530496"
+"apple","negative","126331480233353216"
+"apple","negative","126331327271284736"
+"apple","negative","126328782700285952"
+"apple","negative","126327808803880960"
+"apple","negative","126325125749542913"
+"apple","negative","126324573384871936"
+"apple","negative","126324389741473792"
+"apple","negative","126324077513293824"
+"apple","negative","126321169468100609"
+"apple","negative","126320033369563138"
+"apple","negative","126312535203921920"
+"apple","negative","126311879218966529"
+"apple","negative","126310645443461121"
+"apple","negative","126309616391950336"
+"apple","negative","126308005779210241"
+"apple","negative","126307071984545793"
+"apple","negative","126304942049853441"
+"apple","negative","126302386644975616"
+"apple","negative","126299379832336384"
+"apple","negative","126297326565330944"
+"apple","negative","126297241190281216"
+"apple","negative","126295434862936064"
+"apple","negative","126292335540699136"
+"apple","negative","126286814578348032"
+"apple","negative","126283602571964416"
+"apple","negative","126282994821509120"
+"apple","negative","126281019476291585"
+"apple","negative","126280555980529664"
+"apple","negative","126279811151831042"
+"apple","negative","126270073420791810"
+"apple","negative","126264313563459585"
+"apple","negative","126258214412091392"
+"apple","negative","126257645282799616"
+"apple","negative","126251052667375616"
+"apple","negative","126247557339947008"
+"apple","negative","126243680129523712"
+"apple","negative","126240605419487232"
+"apple","negative","126239832895795200"
+"apple","negative","126238223537152001"
+"apple","negative","126221894126022656"
+"apple","negative","126189036644728832"
+"apple","negative","126188946974720000"
+"apple","negative","126188717902802944"
+"apple","negative","126188686453907457"
+"apple","negative","126182880123695104"
+"apple","negative","126167083334643713"
+"apple","negative","126163315499081728"
+"apple","negative","126163250172801024"
+"apple","negative","126158846375903233"
+"apple","negative","126156590662422528"
+"apple","negative","126155291288023040"
+"apple","negative","126153311521996800"
+"apple","negative","126150581558591488"
+"apple","negative","126148955217203200"
+"apple","negative","126148565302128640"
+"apple","negative","126143926523539457"
+"apple","negative","126141380409036800"
+"apple","negative","126141077131497472"
+"apple","negative","126140389873827841"
+"apple","negative","126132919117938689"
+"apple","negative","126131535211536384"
+"apple","negative","126129938247061504"
+"apple","negative","126129582326816769"
+"apple","negative","126128599030956032"
+"apple","negative","126127465155403777"
+"apple","negative","126126605344047105"
+"apple","negative","126121175926571009"
+"apple","negative","126118222746497025"
+"apple","negative","126106964420857857"
+"apple","negative","126099775417364480"
+"apple","negative","126096173198082048"
+"apple","negative","126094194312876032"
+"apple","negative","126089287660863488"
+"apple","negative","126088404084588546"
+"apple","negative","126085893353250816"
+"apple","negative","126084068298334208"
+"apple","negative","126082198720888833"
+"apple","negative","126082123743502336"
+"apple","negative","126079672386723840"
+"apple","negative","126075115686465536"
+"apple","negative","126073788323479552"
+"apple","negative","126073520504569858"
+"apple","negative","126072901144281088"
+"apple","negative","126070647125327872"
+"apple","negative","126069614181486593"
+"apple","negative","126068917012668416"
+"apple","negative","126063215842037760"
+"apple","negative","126060639268507649"
+"apple","negative","126054048972537856"
+"apple","negative","126053722966069248"
+"apple","negative","126050114518261760"
+"apple","negative","126044756320075776"
+"apple","negative","126044425964109824"
+"apple","negative","126042137740574720"
+"apple","negative","126042022900547584"
+"apple","negative","126037831301869568"
+"apple","negative","126036793970786304"
+"apple","negative","126034507475337216"
+"apple","negative","126033747991736320"
+"apple","negative","126031969166434304"
+"apple","negative","126030936084189184"
+"apple","negative","126029733325582336"
+"apple","negative","126021108641181696"
+"apple","negative","126018120983904256"
+"apple","negative","126016585348558848"
+"apple","negative","126014999444467712"
+"apple","negative","126014540721827840"
+"apple","negative","126012822936231936"
+"apple","negative","126012404332113920"
+"apple","negative","126012089415380992"
+"apple","negative","126008913400303616"
+"apple","negative","126006966312108032"
+"apple","negative","126006116168642560"
+"apple","negative","126006088725303296"
+"apple","negative","126004661248471040"
+"apple","negative","126003967552524288"
+"apple","negative","126001775626031105"
+"apple","negative","126001635162992640"
+"apple","negative","125999655011098624"
+"apple","negative","125996379913986048"
+"apple","negative","125995264325599233"
+"apple","negative","125994965183635456"
+"apple","negative","125994596336533504"
+"apple","negative","125989051101741056"
+"apple","negative","125988395787882497"
+"apple","negative","125987979784224770"
+"apple","negative","125987439692099584"
+"apple","negative","125982320917364736"
+"apple","negative","125980659415138304"
+"apple","negative","125978454146551808"
+"apple","negative","125976113657823232"
+"apple","negative","125974886006005760"
+"apple","negative","125974810021998595"
+"apple","negative","125974351035117568"
+"apple","negative","125973390283653120"
+"apple","negative","125969932285513728"
+"apple","negative","125969502587465729"
+"apple","negative","125966385259098112"
+"apple","negative","125965988146585601"
+"apple","negative","125965853769478144"
+"apple","negative","125964314220830722"
+"apple","negative","125963262733991936"
+"apple","negative","125961999791308800"
+"apple","negative","125961793926475776"
+"apple","negative","125960026891362304"
+"apple","negative","125958961269702656"
+"apple","negative","125958368773943296"
+"apple","negative","125956505768960000"
+"apple","negative","125956403574747137"
+"apple","negative","125956319344721920"
+"apple","negative","125954651152592896"
+"apple","negative","125953600861126656"
+"apple","negative","125950557310562305"
+"apple","negative","125945821240885248"
+"apple","negative","125943204943114240"
+"apple","negative","125937228328341504"
+"apple","negative","125934808592433153"
+"apple","negative","125930406125117440"
+"apple","negative","125929899071516676"
+"apple","negative","125929395264299009"
+"apple","negative","125920912171216896"
+"apple","negative","125876542600519681"
+"apple","negative","125863232249405440"
+"apple","negative","125846659182764032"
+"apple","negative","125845538926112768"
+"apple","negative","125836461936361472"
+"apple","negative","125826259048607744"
+"apple","negative","125824148579692544"
+"apple","negative","125824054958637056"
+"apple","negative","125822115155947520"
+"apple","negative","125819194049699840"
+"apple","negative","125815370513793024"
+"apple","negative","125814380871946240"
+"apple","negative","125812985301172224"
+"apple","negative","125811345064067072"
+"apple","negative","125807830363156480"
+"apple","negative","125806568389361664"
+"apple","negative","125806240138928128"
+"apple","negative","125799384976863232"
+"apple","negative","125794882257305600"
+"apple","negative","125794703819030528"
+"apple","negative","125731810733867011"
+"apple","negative","125729727653756928"
+"apple","negative","125728250579259392"
+"apple","negative","125722610179907584"
+"apple","negative","125722107710672896"
+"apple","negative","125717447276904448"
+"apple","negative","125717161531551744"
+"apple","negative","125713100782575616"
+"apple","negative","125712104253702146"
+"apple","negative","125711996074209280"
+"apple","negative","125710089716899840"
+"apple","negative","125708348237680640"
+"apple","negative","125706246056706049"
+"apple","negative","125701785540235264"
+"apple","negative","125694815743651840"
+"apple","negative","125692685033021441"
+"apple","negative","125692532750430209"
+"apple","negative","125691072398639104"
+"apple","negative","125689691927351296"
+"apple","negative","125681375058735104"
+"apple","negative","125681125376000000"
+"apple","negative","125679166015283203"
+"apple","negative","125675806977556480"
+"apple","negative","125673358418391041"
+"apple","negative","125665094561574913"
+"apple","negative","125664507757461504"
+"apple","negative","125663477573500930"
+"apple","negative","125661140939321344"
+"apple","negative","125659125886623744"
+"apple","negative","125657359841361920"
+"apple","negative","125656618326175745"
+"apple","negative","125656559190683651"
+"apple","negative","125654540455378945"
+"apple","negative","125651769261965312"
+"apple","negative","125649285667749889"
+"apple","negative","125645811903250432"
+"apple","negative","125643054190305280"
+"apple","negative","125642742977138689"
+"apple","negative","125641051531784192"
+"apple","negative","125639217090011136"
+"apple","negative","125633065878958080"
+"apple","negative","125631239364427776"
+"apple","negative","125626166492147713"
+"apple","negative","125621144148639744"
+"apple","negative","125619303356710912"
+"apple","negative","125607492356018176"
+"apple","negative","125603435440644098"
+"apple","negative","125601235985367041"
+"apple","negative","125599423131697154"
+"apple","negative","125598450090917888"
+"apple","negative","125588697872728065"
+"apple","negative","125588202286366721"
+"apple","negative","125586348064247808"
+"apple","negative","125583385895768064"
+"apple","negative","125578269197217792"
+"apple","negative","125561930416013312"
+"apple","negative","125556679571025920"
+"apple","negative","125547255947198465"
+"apple","negative","125544764203466752"
+"apple","negative","125533730222784512"
+"apple","negative","125521682894041088"
+"apple","negative","125510333078048768"
+"apple","negative","125476730067615744"
+"apple","negative","125475953509015552"
+"apple","negative","125471372485992448"
+"apple","negative","125464229577891840"
+"apple","negative","125458395800154112"
+"apple","negative","125442137302110208"
+"apple","negative","125441732941840385"
+"apple","negative","125441478951575552"
+"apple","negative","125435218017525760"
+"apple","negative","125420263687995392"
+"apple","negative","125408962215555072"
+"apple","negative","125408737296003072"
+"apple","negative","125408701166256128"
+"apple","negative","125407447383937025"
+"apple","negative","125406743923671040"
+"apple","negative","125405260650000384"
+"apple","negative","125405005493706752"
+"apple","negative","125404317669785600"
+"apple","negative","125400161886277632"
+"apple","negative","125399780527570944"
+"apple","negative","125395636219678720"
+"apple","negative","125394863255588864"
+"apple","negative","125394746452619265"
+"apple","negative","125394663573172224"
+"apple","negative","125380163302199296"
+"apple","negative","125371779039502336"
+"apple","negative","125369698840887297"
+"apple","negative","125368089159286784"
+"apple","negative","125365852487942145"
+"apple","negative","125365814579826688"
+"apple","negative","125365581170999296"
+"apple","negative","125355869859876864"
+"apple","negative","125355139409252352"
+"apple","negative","125347619072512000"
+"apple","negative","125346783390990337"
+"apple","negative","125341902739484672"
+"apple","negative","125341804857008128"
+"apple","negative","125338210158125056"
+"apple","negative","125336335656558592"
+"apple","negative","125334948017213441"
+"apple","negative","125334519254482944"
+"apple","negative","125333948556521472"
+"apple","negative","125330337847975937"
+"apple","negative","125330038248849408"
+"apple","negative","125329867674886144"
+"apple","negative","125327896066785280"
+"apple","negative","125313086465904641"
+"apple","negative","125311989751877632"
+"apple","negative","125309975881977857"
+"apple","negative","125309448108519424"
+"apple","negative","125309427422203904"
+"apple","negative","125307394640199680"
+"apple","negative","125305396842856448"
+"apple","negative","125304159581900800"
+"apple","negative","125303217214062592"
+"apple","negative","125302079752384512"
+"apple","negative","125301860256063488"
+"apple","negative","125301393560047616"
+"apple","negative","125301265700892672"
+"apple","negative","125295729139908608"
+"apple","negative","125294978623746048"
+"apple","negative","125277260872822786"
+"apple","negative","125276004817190914"
+"apple","negative","125271422431014914"
+"apple","negative","125269239207706624"
+"apple","negative","125269161327865856"
+"apple","negative","125268117680160768"
+"apple","negative","125267178336419840"
+"apple","negative","125265721281351680"
+"apple","negative","125261285083447296"
+"apple","negative","125247130762883072"
+"apple","negative","125245780192792576"
+"apple","negative","125245246136258561"
+"apple","negative","125245104859529216"
+"apple","negative","125236708403970048"
+"apple","negative","125236166151774208"
+"apple","negative","125232266849947648"
+"apple","negative","125230743990444032"
+"apple","negative","125230107580317696"
+"apple","negative","125227837438435328"
+"apple","negative","125224588253741056"
+"apple","negative","125223685194915840"
+"apple","negative","125212404299735040"
+"apple","negative","125204228967903232"
+"apple","negative","125202037293064192"
+"apple","negative","125129328446017536"
+"apple","neutral","126417484017451009"
+"apple","neutral","126415742177513472"
+"apple","neutral","126415618625912832"
+"apple","neutral","126414657836687362"
+"apple","neutral","126410146703351808"
+"apple","neutral","126409984836763648"
+"apple","neutral","126407959495442432"
+"apple","neutral","126407672521162753"
+"apple","neutral","126407511531192320"
+"apple","neutral","126405911697817600"
+"apple","neutral","126405821482532864"
+"apple","neutral","126405160934178816"
+"apple","neutral","126403530838913024"
+"apple","neutral","126401882766839811"
+"apple","neutral","126400491067416576"
+"apple","neutral","126394795802370049"
+"apple","neutral","126393452324855808"
+"apple","neutral","126389413054910464"
+"apple","neutral","126387460463788032"
+"apple","neutral","126386085164101634"
+"apple","neutral","126384526925639681"
+"apple","neutral","126383125059211265"
+"apple","neutral","126382776072146944"
+"apple","neutral","126381578975842304"
+"apple","neutral","126380323733909504"
+"apple","neutral","126379730827083776"
+"apple","neutral","126377120023842816"
+"apple","neutral","126375024595705856"
+"apple","neutral","126374630377275392"
+"apple","neutral","126370776013213697"
+"apple","neutral","126368680459251712"
+"apple","neutral","126368285259350017"
+"apple","neutral","126368127524159488"
+"apple","neutral","126364189097865216"
+"apple","neutral","126362867778859008"
+"apple","neutral","126360821419884544"
+"apple","neutral","126360182308618240"
+"apple","neutral","126358012343492608"
+"apple","neutral","126357527196741632"
+"apple","neutral","126353359962775552"
+"apple","neutral","126351669029126144"
+"apple","neutral","126348857071239168"
+"apple","neutral","126347890196103168"
+"apple","neutral","126346633721032705"
+"apple","neutral","126346584068861952"
+"apple","neutral","126343124174901248"
+"apple","neutral","126339328434651136"
+"apple","neutral","126336867477094400"
+"apple","neutral","126336687382081536"
+"apple","neutral","126331354718801921"
+"apple","neutral","126330974270271488"
+"apple","neutral","126330155441467392"
+"apple","neutral","126329388320043008"
+"apple","neutral","126329109759524865"
+"apple","neutral","126328424624160768"
+"apple","neutral","126326886354784256"
+"apple","neutral","126325069281624064"
+"apple","neutral","126324621279641601"
+"apple","neutral","126324256236765185"
+"apple","neutral","126323574989520896"
+"apple","neutral","126323533696614402"
+"apple","neutral","126321197062426624"
+"apple","neutral","126320247379730432"
+"apple","neutral","126320076063379456"
+"apple","neutral","126318553031917569"
+"apple","neutral","126317201962700800"
+"apple","neutral","126316594971422720"
+"apple","neutral","126316179429134336"
+"apple","neutral","126315088641658881"
+"apple","neutral","126314701721309184"
+"apple","neutral","126312423132102657"
+"apple","neutral","126311681126187008"
+"apple","neutral","126311223343058946"
+"apple","neutral","126309939961536513"
+"apple","neutral","126308556294205441"
+"apple","neutral","126307117274644480"
+"apple","neutral","126302918797312000"
+"apple","neutral","126302719882444801"
+"apple","neutral","126301989486342145"
+"apple","neutral","126301301511426049"
+"apple","neutral","126300596633481216"
+"apple","neutral","126300304600866816"
+"apple","neutral","126298592364331008"
+"apple","neutral","126294550535872512"
+"apple","neutral","126294304628019201"
+"apple","neutral","126293879166205952"
+"apple","neutral","126293725155569664"
+"apple","neutral","126292279009882113"
+"apple","neutral","126292233963053056"
+"apple","neutral","126292109727768576"
+"apple","neutral","126291860305100801"
+"apple","neutral","126290154737504256"
+"apple","neutral","126289716097196032"
+"apple","neutral","126287512296632320"
+"apple","neutral","126283761754185728"
+"apple","neutral","126283639959990274"
+"apple","neutral","126283441657495552"
+"apple","neutral","126283440457912320"
+"apple","neutral","126283024278110208"
+"apple","neutral","126281432644595713"
+"apple","neutral","126280507729260544"
+"apple","neutral","126279672433614848"
+"apple","neutral","126277810431074304"
+"apple","neutral","126272713332506624"
+"apple","neutral","126267746739699713"
+"apple","neutral","126257394622808064"
+"apple","neutral","126243528832593920"
+"apple","neutral","126232037492404224"
+"apple","neutral","126229089651654656"
+"apple","neutral","126225922159427584"
+"apple","neutral","126219340214304768"
+"apple","neutral","126218596786511873"
+"apple","neutral","126217194173501441"
+"apple","neutral","126213965817708544"
+"apple","neutral","126211975595311104"
+"apple","neutral","126209902241787904"
+"apple","neutral","126205800359280640"
+"apple","neutral","126201991125929984"
+"apple","neutral","126197405015220225"
+"apple","neutral","126192452297170945"
+"apple","neutral","126186955296878592"
+"apple","neutral","126186795808456704"
+"apple","neutral","126185114173583360"
+"apple","neutral","126177221571395584"
+"apple","neutral","126171911523794944"
+"apple","neutral","126165547154018304"
+"apple","neutral","126163403063570432"
+"apple","neutral","126157019072835584"
+"apple","neutral","126149567036137473"
+"apple","neutral","126148184358653954"
+"apple","neutral","126147867478982656"
+"apple","neutral","126146495396319232"
+"apple","neutral","126141631291326464"
+"apple","neutral","126141628372090880"
+"apple","neutral","126141580682854400"
+"apple","neutral","126141157146238976"
+"apple","neutral","126134865887363072"
+"apple","neutral","126130171404230656"
+"apple","neutral","126125830094061568"
+"apple","neutral","126125799744094208"
+"apple","neutral","126125182405447680"
+"apple","neutral","126124917992341504"
+"apple","neutral","126122372775415808"
+"apple","neutral","126118389591711744"
+"apple","neutral","126113944891949056"
+"apple","neutral","126110863550717952"
+"apple","neutral","126110770864979968"
+"apple","neutral","126110374549405696"
+"apple","neutral","126109969912311810"
+"apple","neutral","126107127231152129"
+"apple","neutral","126106914684796928"
+"apple","neutral","126105236229193728"
+"apple","neutral","126105175294357505"
+"apple","neutral","126104490511319041"
+"apple","neutral","126104322999197696"
+"apple","neutral","126104244402126848"
+"apple","neutral","126102213956337664"
+"apple","neutral","126102037057388544"
+"apple","neutral","126098699196698624"
+"apple","neutral","126095965391298560"
+"apple","neutral","126094392183357443"
+"apple","neutral","126094029015355392"
+"apple","neutral","126094027140513792"
+"apple","neutral","126091878469869568"
+"apple","neutral","126089815136538624"
+"apple","neutral","126089347639427072"
+"apple","neutral","126087892580827137"
+"apple","neutral","126086553415057408"
+"apple","neutral","126082898783780864"
+"apple","neutral","126081812236738560"
+"apple","neutral","126079649959772160"
+"apple","neutral","126078565346312192"
+"apple","neutral","126073142107045888"
+"apple","neutral","126072051118260225"
+"apple","neutral","126071173640499200"
+"apple","neutral","126068964685135872"
+"apple","neutral","126066994008162305"
+"apple","neutral","126066452787773443"
+"apple","neutral","126065983138955265"
+"apple","neutral","126065529684369408"
+"apple","neutral","126061182720278528"
+"apple","neutral","126057389333020672"
+"apple","neutral","126056940060155904"
+"apple","neutral","126055880394420224"
+"apple","neutral","126054998080622593"
+"apple","neutral","126054725727698944"
+"apple","neutral","126054145617694720"
+"apple","neutral","126052649475915776"
+"apple","neutral","126051375422504961"
+"apple","neutral","126049560878526464"
+"apple","neutral","126044185815040000"
+"apple","neutral","126044055644807169"
+"apple","neutral","126043954641780736"
+"apple","neutral","126042506717704192"
+"apple","neutral","126041773356232704"
+"apple","neutral","126041570876203009"
+"apple","neutral","126040050441011200"
+"apple","neutral","126039521853845504"
+"apple","neutral","126039090578735104"
+"apple","neutral","126031463647944704"
+"apple","neutral","126030091892432896"
+"apple","neutral","126029114850295809"
+"apple","neutral","126022527578406912"
+"apple","neutral","126021436149211136"
+"apple","neutral","126019432194650113"
+"apple","neutral","126018538531061760"
+"apple","neutral","126017685246050304"
+"apple","neutral","126017643747606528"
+"apple","neutral","126016494701256704"
+"apple","neutral","126016405085757440"
+"apple","neutral","126014643826208768"
+"apple","neutral","126014277814468608"
+"apple","neutral","126014214467895297"
+"apple","neutral","126014102379302912"
+"apple","neutral","126013626426466304"
+"apple","neutral","126012833128390656"
+"apple","neutral","126012515019784192"
+"apple","neutral","126012034545496065"
+"apple","neutral","126012004312956928"
+"apple","neutral","126011120694726656"
+"apple","neutral","126010471202566144"
+"apple","neutral","126009386022879232"
+"apple","neutral","126008776322064384"
+"apple","neutral","126007705600135168"
+"apple","neutral","126006669535744000"
+"apple","neutral","126006572420833282"
+"apple","neutral","126006349959135232"
+"apple","neutral","126004552557273088"
+"apple","neutral","126003746135224320"
+"apple","neutral","126003567315255296"
+"apple","neutral","126001989309054976"
+"apple","neutral","126001758853009409"
+"apple","neutral","126000843798491136"
+"apple","neutral","125999022908510209"
+"apple","neutral","125998732046123009"
+"apple","neutral","125996412252078080"
+"apple","neutral","125996330500890624"
+"apple","neutral","125994997609803776"
+"apple","neutral","125994518989385729"
+"apple","neutral","125993702782025729"
+"apple","neutral","125993105722839040"
+"apple","neutral","125992838910586880"
+"apple","neutral","125992594395250688"
+"apple","neutral","125991449455104000"
+"apple","neutral","125990236743405568"
+"apple","neutral","125990217801940992"
+"apple","neutral","125989605634879488"
+"apple","neutral","125989196132388864"
+"apple","neutral","125989009091592192"
+"apple","neutral","125988775548559360"
+"apple","neutral","125988651426512899"
+"apple","neutral","125984350989860864"
+"apple","neutral","125983179877253120"
+"apple","neutral","125981074114359297"
+"apple","neutral","125980918220464128"
+"apple","neutral","125980676653723648"
+"apple","neutral","125980615664336896"
+"apple","neutral","125979228452818944"
+"apple","neutral","125978290367381504"
+"apple","neutral","125975779447291904"
+"apple","neutral","125974955983769603"
+"apple","neutral","125974497546338304"
+"apple","neutral","125972882240188416"
+"apple","neutral","125971256335024128"
+"apple","neutral","125969677997453312"
+"apple","neutral","125969128514260992"
+"apple","neutral","125967560171720704"
+"apple","neutral","125967126912712705"
+"apple","neutral","125965569659895808"
+"apple","neutral","125963773176582144"
+"apple","neutral","125962667541270528"
+"apple","neutral","125962608519036928"
+"apple","neutral","125961033348153345"
+"apple","neutral","125960438981734400"
+"apple","neutral","125958702455988225"
+"apple","neutral","125958525708021760"
+"apple","neutral","125958117086347264"
+"apple","neutral","125957972466737152"
+"apple","neutral","125957965109932032"
+"apple","neutral","125951303770845185"
+"apple","neutral","125950941349421057"
+"apple","neutral","125950505389273090"
+"apple","neutral","125950026181648385"
+"apple","neutral","125949784677810176"
+"apple","neutral","125948450620702720"
+"apple","neutral","125947460592996352"
+"apple","neutral","125944856504827904"
+"apple","neutral","125944293671182336"
+"apple","neutral","125943115449253888"
+"apple","neutral","125943078837161984"
+"apple","neutral","125943020767019008"
+"apple","neutral","125940398915977217"
+"apple","neutral","125940300987371521"
+"apple","neutral","125939862078619648"
+"apple","neutral","125939833775460352"
+"apple","neutral","125938918540574720"
+"apple","neutral","125938325151432706"
+"apple","neutral","125936985796919296"
+"apple","neutral","125936323273048065"
+"apple","neutral","125935636300570624"
+"apple","neutral","125935627056324609"
+"apple","neutral","125935503752171520"
+"apple","neutral","125935314878476289"
+"apple","neutral","125933630613766144"
+"apple","neutral","125932876721168384"
+"apple","neutral","125930342891790337"
+"apple","neutral","125930143066759169"
+"apple","neutral","125930002607906816"
+"apple","neutral","125927540249473024"
+"apple","neutral","125927536847880192"
+"apple","neutral","125927533614084097"
+"apple","neutral","125927530761953281"
+"apple","neutral","125927399010467840"
+"apple","neutral","125927174514540544"
+"apple","neutral","125927032185044992"
+"apple","neutral","125926624930693121"
+"apple","neutral","125922989844856833"
+"apple","neutral","125922500839342080"
+"apple","neutral","125922174648324096"
+"apple","neutral","125921393350160384"
+"apple","neutral","125920729194704896"
+"apple","neutral","125920725595983874"
+"apple","neutral","125920721200361472"
+"apple","neutral","125920717966544896"
+"apple","neutral","125920716297211904"
+"apple","neutral","125919221845721090"
+"apple","neutral","125918906215968771"
+"apple","neutral","125918450920062977"
+"apple","neutral","125918447979872258"
+"apple","neutral","125918444762828800"
+"apple","neutral","125918441013133312"
+"apple","neutral","125917264267579393"
+"apple","neutral","125917174618525696"
+"apple","neutral","125917170571026432"
+"apple","neutral","125917164535418880"
+"apple","neutral","125917160982855680"
+"apple","neutral","125915210337890304"
+"apple","neutral","125908946702696448"
+"apple","neutral","125907633466130432"
+"apple","neutral","125892140940267522"
+"apple","neutral","125891898517889024"
+"apple","neutral","125887065861787648"
+"apple","neutral","125882473312817152"
+"apple","neutral","125878880916611072"
+"apple","neutral","125866627337162752"
+"apple","neutral","125866368758333440"
+"apple","neutral","125859792802693120"
+"apple","neutral","125859488728236032"
+"apple","neutral","125854430171111424"
+"apple","neutral","125840474132066304"
+"apple","neutral","125830917578162176"
+"apple","neutral","125829040740368384"
+"apple","neutral","125828984293425152"
+"apple","neutral","125826820057731074"
+"apple","neutral","125824709421039616"
+"apple","neutral","125823389804929024"
+"apple","neutral","125821979797364736"
+"apple","neutral","125821218258550784"
+"apple","neutral","125817967240949760"
+"apple","neutral","125815990620659713"
+"apple","neutral","125811943054393344"
+"apple","neutral","125807897568481280"
+"apple","neutral","125804983185719297"
+"apple","neutral","125803571601080320"
+"apple","neutral","125803457155301376"
+"apple","neutral","125801811817922561"
+"apple","neutral","125793487479259136"
+"apple","neutral","125792596114161665"
+"apple","neutral","125792107930714113"
+"apple","neutral","125727869363163137"
+"apple","neutral","125727349034598401"
+"apple","neutral","125727044263874560"
+"apple","neutral","125726769297891330"
+"apple","neutral","125725274317914112"
+"apple","neutral","125725019178409984"
+"apple","neutral","125724524732882944"
+"apple","neutral","125721197437648896"
+"apple","neutral","125714971261812736"
+"apple","neutral","125708425752612864"
+"apple","neutral","125708240225959936"
+"apple","neutral","125706125764083712"
+"apple","neutral","125703536632807424"
+"apple","neutral","125698733768843264"
+"apple","neutral","125695449423286272"
+"apple","neutral","125695107734319104"
+"apple","neutral","125694587313467393"
+"apple","neutral","125692890474233856"
+"apple","neutral","125692845054115842"
+"apple","neutral","125691975474229248"
+"apple","neutral","125690764331196416"
+"apple","neutral","125689905954299904"
+"apple","neutral","125687710705926144"
+"apple","neutral","125686643960193024"
+"apple","neutral","125685016389894144"
+"apple","neutral","125679996420374530"
+"apple","neutral","125677838295764992"
+"apple","neutral","125674121722998785"
+"apple","neutral","125669834922008576"
+"apple","neutral","125667332931596290"
+"apple","neutral","125667159547461633"
+"apple","neutral","125666909080387584"
+"apple","neutral","125665930339565568"
+"apple","neutral","125664999036301312"
+"apple","neutral","125664891691474944"
+"apple","neutral","125663967296229376"
+"apple","neutral","125663914552868864"
+"apple","neutral","125661036891226113"
+"apple","neutral","125660067482697729"
+"apple","neutral","125657950185463808"
+"apple","neutral","125647236418912256"
+"apple","neutral","125645376790331392"
+"apple","neutral","125643523792969728"
+"apple","neutral","125642256114909184"
+"apple","neutral","125642161659199488"
+"apple","neutral","125642041140060160"
+"apple","neutral","125641351848136704"
+"apple","neutral","125640758966484992"
+"apple","neutral","125640679325052929"
+"apple","neutral","125640515021578240"
+"apple","neutral","125638955952640000"
+"apple","neutral","125633549847117824"
+"apple","neutral","125633468708302848"
+"apple","neutral","125633233982459904"
+"apple","neutral","125632687879884800"
+"apple","neutral","125632582795804672"
+"apple","neutral","125631556051140608"
+"apple","neutral","125630955154190336"
+"apple","neutral","125630836245676033"
+"apple","neutral","125630016485732352"
+"apple","neutral","125629788563050496"
+"apple","neutral","125627732032888833"
+"apple","neutral","125626286939979776"
+"apple","neutral","125625630254567424"
+"apple","neutral","125625566203346944"
+"apple","neutral","125620113582993408"
+"apple","neutral","125617451705712640"
+"apple","neutral","125617133123153921"
+"apple","neutral","125616747863736320"
+"apple","neutral","125614951787266048"
+"apple","neutral","125614910804738049"
+"apple","neutral","125607526967410689"
+"apple","neutral","125598495137726464"
+"apple","neutral","125596991290998784"
+"apple","neutral","125595669145722880"
+"apple","neutral","125595441562783744"
+"apple","neutral","125591434056318977"
+"apple","neutral","125589884445536257"
+"apple","neutral","125589258898644992"
+"apple","neutral","125588749454278656"
+"apple","neutral","125588587180863489"
+"apple","neutral","125587186723725312"
+"apple","neutral","125586682790674434"
+"apple","neutral","125585063327956992"
+"apple","neutral","125583717354831872"
+"apple","neutral","125581507355086848"
+"apple","neutral","125581280430669824"
+"apple","neutral","125564573167263746"
+"apple","neutral","125559846870323200"
+"apple","neutral","125559232157327360"
+"apple","neutral","125551370676862976"
+"apple","neutral","125546017205665792"
+"apple","neutral","125545914864640000"
+"apple","neutral","125544363945230336"
+"apple","neutral","125541112491425792"
+"apple","neutral","125538769632886784"
+"apple","neutral","125537487455137793"
+"apple","neutral","125532364406398977"
+"apple","neutral","125532202057482240"
+"apple","neutral","125528344480587776"
+"apple","neutral","125527718203887616"
+"apple","neutral","125512197135806464"
+"apple","neutral","125498684401135616"
+"apple","neutral","125496516000485376"
+"apple","neutral","125493419522002944"
+"apple","neutral","125493125098635265"
+"apple","neutral","125489264157917184"
+"apple","neutral","125486845768368128"
+"apple","neutral","125460019859820544"
+"apple","neutral","125459423870197760"
+"apple","neutral","125459375245635584"
+"apple","neutral","125457535951060993"
+"apple","neutral","125448837404954624"
+"apple","neutral","125433354488254464"
+"apple","neutral","125432626482917376"
+"apple","neutral","125432518324400128"
+"apple","neutral","125425087800291328"
+"apple","neutral","125424738662223872"
+"apple","neutral","125419216227667968"
+"apple","neutral","125417324621737985"
+"apple","neutral","125416866243018753"
+"apple","neutral","125416811490578435"
+"apple","neutral","125410153196560384"
+"apple","neutral","125409201907437569"
+"apple","neutral","125406528487424003"
+"apple","neutral","125406003863883776"
+"apple","neutral","125405939015757824"
+"apple","neutral","125402652610797569"
+"apple","neutral","125398913313284096"
+"apple","neutral","125398813543374848"
+"apple","neutral","125394805449699329"
+"apple","neutral","125379023307153408"
+"apple","neutral","125374041556127744"
+"apple","neutral","125373658389692416"
+"apple","neutral","125373427661029376"
+"apple","neutral","125372628746768384"
+"apple","neutral","125369026351349760"
+"apple","neutral","125368381728763904"
+"apple","neutral","125367492976717824"
+"apple","neutral","125366519680086016"
+"apple","neutral","125365963972542464"
+"apple","neutral","125365264656236544"
+"apple","neutral","125364154629492737"
+"apple","neutral","125364122853453824"
+"apple","neutral","125361209137565696"
+"apple","neutral","125360952878182400"
+"apple","neutral","125360877359742976"
+"apple","neutral","125359697770450944"
+"apple","neutral","125359622193295360"
+"apple","neutral","125357901580746752"
+"apple","neutral","125354296752619520"
+"apple","neutral","125351067620880385"
+"apple","neutral","125350537821569024"
+"apple","neutral","125347828754169856"
+"apple","neutral","125347618862792705"
+"apple","neutral","125345723020607488"
+"apple","neutral","125344722196766720"
+"apple","neutral","125344351218974720"
+"apple","neutral","125338231280644096"
+"apple","neutral","125336929825849344"
+"apple","neutral","125336798690942977"
+"apple","neutral","125335012961828866"
+"apple","neutral","125332871174037504"
+"apple","neutral","125332609428496384"
+"apple","neutral","125327281181835264"
+"apple","neutral","125326760769372160"
+"apple","neutral","125325397712846848"
+"apple","neutral","125324916009615360"
+"apple","neutral","125321084525490176"
+"apple","neutral","125318029390249984"
+"apple","neutral","125317541504626688"
+"apple","neutral","125317300860620801"
+"apple","neutral","125315460030922752"
+"apple","neutral","125315263183851521"
+"apple","neutral","125315080081518592"
+"apple","neutral","125312789345599489"
+"apple","neutral","125312357797863425"
+"apple","neutral","125309663913840640"
+"apple","neutral","125305567148388352"
+"apple","neutral","125300705836793856"
+"apple","neutral","125300603059576833"
+"apple","neutral","125287442407362560"
+"apple","neutral","125283873331494913"
+"apple","neutral","125281502866059264"
+"apple","neutral","125278676949544960"
+"apple","neutral","125276525472911360"
+"apple","neutral","125275795252977664"
+"apple","neutral","125275280678993920"
+"apple","neutral","125273317673414656"
+"apple","neutral","125270965268643840"
+"apple","neutral","125267017942052866"
+"apple","neutral","125266503657472000"
+"apple","neutral","125261029834899456"
+"apple","neutral","125260105154437121"
+"apple","neutral","125257803790159873"
+"apple","neutral","125252442836320256"
+"apple","neutral","125250721280040961"
+"apple","neutral","125250617911418881"
+"apple","neutral","125250078108684288"
+"apple","neutral","125246898830458880"
+"apple","neutral","125245892814045184"
+"apple","neutral","125244798671142912"
+"apple","neutral","125231250247135233"
+"apple","neutral","125227689895407616"
+"apple","neutral","125222749034659840"
+"apple","neutral","125218106778992640"
+"apple","neutral","125211793655218178"
+"apple","neutral","125206271560384512"
+"apple","neutral","125196751387889665"
+"apple","neutral","125193298624258049"
+"apple","neutral","125184976579862530"
+"apple","neutral","125085987431923713"
+"apple","irrelevant","126405660308021248"
+"apple","irrelevant","126403953058529280"
+"apple","irrelevant","126402391259103232"
+"apple","irrelevant","126399172495679488"
+"apple","irrelevant","126394266145665025"
+"apple","irrelevant","126391727408947200"
+"apple","irrelevant","126387209824776192"
+"apple","irrelevant","126385587740610563"
+"apple","irrelevant","126381904621600768"
+"apple","irrelevant","126379095004160001"
+"apple","irrelevant","126373281099026432"
+"apple","irrelevant","126367728754884609"
+"apple","irrelevant","126362562865528832"
+"apple","irrelevant","126360606042374144"
+"apple","irrelevant","126355839274594304"
+"apple","irrelevant","126355573586399232"
+"apple","irrelevant","126352049070809089"
+"apple","irrelevant","126346705292640257"
+"apple","irrelevant","126346563147673600"
+"apple","irrelevant","126346004688674816"
+"apple","irrelevant","126342441057001472"
+"apple","irrelevant","126340074777489408"
+"apple","irrelevant","126332817134190592"
+"apple","irrelevant","126331879883415552"
+"apple","irrelevant","126329876935479296"
+"apple","irrelevant","126319126913363968"
+"apple","irrelevant","126313259572793345"
+"apple","irrelevant","126312509983559681"
+"apple","irrelevant","126310736577298432"
+"apple","irrelevant","126307114959372289"
+"apple","irrelevant","126304243144597505"
+"apple","irrelevant","126303928039116800"
+"apple","irrelevant","126303310054559744"
+"apple","irrelevant","126298834203713536"
+"apple","irrelevant","126298134212120577"
+"apple","irrelevant","126297754799587328"
+"apple","irrelevant","126290039138291712"
+"apple","irrelevant","126271901340401665"
+"apple","irrelevant","126270990459219968"
+"apple","irrelevant","126264647652343808"
+"apple","irrelevant","126264035007143936"
+"apple","irrelevant","126263600548556800"
+"apple","irrelevant","126260304819662849"
+"apple","irrelevant","126252530819809280"
+"apple","irrelevant","126236984644612096"
+"apple","irrelevant","126232767821381632"
+"apple","irrelevant","126228762596618240"
+"apple","irrelevant","126215978341236736"
+"apple","irrelevant","126195701704163328"
+"apple","irrelevant","126186608113356800"
+"apple","irrelevant","126175729024122880"
+"apple","irrelevant","126173465253384193"
+"apple","irrelevant","126164921485492224"
+"apple","irrelevant","126149195957673984"
+"apple","irrelevant","126138637652992001"
+"apple","irrelevant","126131070050639874"
+"apple","irrelevant","126111632773480448"
+"apple","irrelevant","126097345124368385"
+"apple","irrelevant","126093841232166912"
+"apple","irrelevant","126061579233017856"
+"apple","irrelevant","126054622564589569"
+"apple","irrelevant","126054568273518592"
+"apple","irrelevant","126052637014630400"
+"apple","irrelevant","126042611709521921"
+"apple","irrelevant","126024290201124864"
+"apple","irrelevant","126022958710915072"
+"apple","irrelevant","126022708524888064"
+"apple","irrelevant","126007147199868928"
+"apple","irrelevant","126005063595466753"
+"apple","irrelevant","126001383869644800"
+"apple","irrelevant","126000885485678592"
+"apple","irrelevant","125998496535937024"
+"apple","irrelevant","125996653990772737"
+"apple","irrelevant","125993886249267200"
+"apple","irrelevant","125993438205321218"
+"apple","irrelevant","125993305325576193"
+"apple","irrelevant","125992545552576512"
+"apple","irrelevant","125991634855923712"
+"apple","irrelevant","125990804488601600"
+"apple","irrelevant","125990754769309696"
+"apple","irrelevant","125982640263274496"
+"apple","irrelevant","125973789526863872"
+"apple","irrelevant","125970459404673026"
+"apple","irrelevant","125968277083136000"
+"apple","irrelevant","125967413299773440"
+"apple","irrelevant","125967315488608257"
+"apple","irrelevant","125965369532878849"
+"apple","irrelevant","125965364667486209"
+"apple","irrelevant","125959699089719297"
+"apple","irrelevant","125959482588143616"
+"apple","irrelevant","125957826500771840"
+"apple","irrelevant","125957742698561537"
+"apple","irrelevant","125948329694724097"
+"apple","irrelevant","125930962545672192"
+"apple","irrelevant","125928640394432513"
+"apple","irrelevant","125910538550124545"
+"apple","irrelevant","125909170074562561"
+"apple","irrelevant","125877369796968448"
+"apple","irrelevant","125873952953352192"
+"apple","irrelevant","125862601677737985"
+"apple","irrelevant","125857117407166464"
+"apple","irrelevant","125827656238379008"
+"apple","irrelevant","125826633713201152"
+"apple","irrelevant","125825293473685505"
+"apple","irrelevant","125815316596002816"
+"apple","irrelevant","125797001337122817"
+"apple","irrelevant","125724424774221826"
+"apple","irrelevant","125707107495452673"
+"apple","irrelevant","125705666592641024"
+"apple","irrelevant","125705646942330880"
+"apple","irrelevant","125699684693065728"
+"apple","irrelevant","125695680135172096"
+"apple","irrelevant","125695094836826112"
+"apple","irrelevant","125653144993660928"
+"apple","irrelevant","125650076759638016"
+"apple","irrelevant","125647972087242754"
+"apple","irrelevant","125641150186000384"
+"apple","irrelevant","125622089502830592"
+"apple","irrelevant","125621628917915648"
+"apple","irrelevant","125602732278169601"
+"apple","irrelevant","125595437938905088"
+"apple","irrelevant","125590191502131200"
+"apple","irrelevant","125580342244548608"
+"apple","irrelevant","125568051277086721"
+"apple","irrelevant","125542968844226560"
+"apple","irrelevant","125542941287649280"
+"apple","irrelevant","125534067495141376"
+"apple","irrelevant","125526544377577472"
+"apple","irrelevant","125521498055254016"
+"apple","irrelevant","125521344342392832"
+"apple","irrelevant","125516954407677952"
+"apple","irrelevant","125495075504537600"
+"apple","irrelevant","125490577130258432"
+"apple","irrelevant","125485687339352064"
+"apple","irrelevant","125447473786392576"
+"apple","irrelevant","125445752083329025"
+"apple","irrelevant","125445407592546304"
+"apple","irrelevant","125421514928558080"
+"apple","irrelevant","125411589905068033"
+"apple","irrelevant","125409044222586880"
+"apple","irrelevant","125406418777018368"
+"apple","irrelevant","125405429583970305"
+"apple","irrelevant","125375333162684416"
+"apple","irrelevant","125361267555835905"
+"apple","irrelevant","125353260520443904"
+"apple","irrelevant","125352405482217473"
+"apple","irrelevant","125346522618535937"
+"apple","irrelevant","125339193802100736"
+"apple","irrelevant","125336882862231552"
+"apple","irrelevant","125333140414808065"
+"apple","irrelevant","125325092841467904"
+"apple","irrelevant","125321769203666944"
+"apple","irrelevant","125319263027343360"
+"apple","irrelevant","125305753903964161"
+"apple","irrelevant","125301831286013952"
+"apple","irrelevant","125287180561166336"
+"apple","irrelevant","125275735815491584"
+"apple","irrelevant","125232405517844481"
+"apple","irrelevant","125228207002759168"
+"apple","irrelevant","125219664488960000"
+"apple","irrelevant","125209676416679936"
+"apple","irrelevant","125206785584922624"
+"apple","irrelevant","125184213342367744"
+"apple","irrelevant","125082707389718529"
+"google","positive","126534770095169536"
+"google","positive","126534201880219648"
+"google","positive","126534020367519744"
+"google","positive","126533948925952000"
+"google","positive","126533885109600256"
+"google","positive","126533562781544448"
+"google","positive","126533349727666176"
+"google","positive","126533268119109632"
+"google","positive","126533166352699392"
+"google","positive","126532897715929088"
+"google","positive","126531552367751169"
+"google","positive","126531180907601920"
+"google","positive","126530945976238080"
+"google","positive","126530924576907264"
+"google","positive","126530807891374082"
+"google","positive","126530189579649024"
+"google","positive","126530027939569665"
+"google","positive","126529770778411008"
+"google","positive","126528982807089152"
+"google","positive","126528978239496194"
+"google","positive","126528804192653312"
+"google","positive","126528264117293056"
+"google","positive","126526219587039233"
+"google","positive","126525469897146368"
+"google","positive","126525368860540928"
+"google","positive","126525172969766912"
+"google","positive","126524301259194368"
+"google","positive","126523731710443521"
+"google","positive","126523530903953408"
+"google","positive","126523525598162944"
+"google","positive","126522990585315328"
+"google","positive","126522810821644288"
+"google","positive","126522714713370624"
+"google","positive","126522621251682304"
+"google","positive","126522262768726016"
+"google","positive","126521613259771904"
+"google","positive","126521382220738560"
+"google","positive","126521286053724160"
+"google","positive","126520518609350656"
+"google","positive","126520029410885632"
+"google","positive","126519483752914944"
+"google","positive","126519329025040384"
+"google","positive","126519123772588032"
+"google","positive","126519017405030400"
+"google","positive","126518882939838464"
+"google","positive","126516914678808578"
+"google","positive","126516779886456832"
+"google","positive","126516304336257025"
+"google","positive","126515760855134208"
+"google","positive","126514474378203136"
+"google","positive","126513620686352384"
+"google","positive","126513526385819648"
+"google","positive","126513425043030016"
+"google","positive","126512728297844736"
+"google","positive","126512631937904640"
+"google","positive","126512208451600384"
+"google","positive","126511837796765696"
+"google","positive","126511545160171520"
+"google","positive","126511426926944256"
+"google","positive","126511000907288576"
+"google","positive","126510977335300096"
+"google","positive","126510551789604864"
+"google","positive","126509929619132417"
+"google","positive","126509528287166464"
+"google","positive","126508433582211072"
+"google","positive","126508393203642368"
+"google","positive","126507456019968000"
+"google","positive","126507105023819776"
+"google","positive","126506850781888512"
+"google","positive","126506410195431424"
+"google","positive","126506064387637249"
+"google","positive","126505384428052481"
+"google","positive","126505187752943616"
+"google","positive","126505144878772224"
+"google","positive","126504782465732608"
+"google","positive","126504452680187905"
+"google","positive","126504346639802368"
+"google","positive","126504216004005888"
+"google","positive","126504013939216384"
+"google","positive","126503946092158976"
+"google","positive","126503805369069568"
+"google","positive","126503790412181504"
+"google","positive","126503029548654593"
+"google","positive","126502415322193920"
+"google","positive","126502014560649216"
+"google","positive","126501732443361280"
+"google","positive","126501468902658048"
+"google","positive","126500614552289282"
+"google","positive","126499965869625345"
+"google","positive","126499581520384001"
+"google","positive","126499428965158912"
+"google","positive","126499145014980608"
+"google","positive","126499143282737152"
+"google","positive","126498734409396224"
+"google","positive","126498608815149056"
+"google","positive","126498587499696128"
+"google","positive","126497976314109952"
+"google","positive","126497860752646146"
+"google","positive","126497655785402368"
+"google","positive","126496772586610688"
+"google","positive","126496739531304960"
+"google","positive","126496342901133313"
+"google","positive","126496262668292096"
+"google","positive","126496155856142336"
+"google","positive","126496005809127424"
+"google","positive","126495843116265475"
+"google","positive","126495812724338688"
+"google","positive","126495283176685569"
+"google","positive","126495221373607936"
+"google","positive","126494976396898305"
+"google","positive","126494883367235585"
+"google","positive","126494838689513473"
+"google","positive","126494834449063936"
+"google","positive","126494645281755136"
+"google","positive","126494573966016512"
+"google","positive","126494442290020352"
+"google","positive","126494344566943744"
+"google","positive","126494339248562176"
+"google","positive","126494280318582784"
+"google","positive","126494221879357440"
+"google","positive","126494100252925954"
+"google","positive","126494070385283072"
+"google","positive","126493889761787904"
+"google","positive","126493850914131968"
+"google","positive","126493722916560896"
+"google","positive","126493683515260928"
+"google","positive","126493648757071873"
+"google","positive","126493639605092352"
+"google","positive","126493624270725120"
+"google","positive","126493538945994752"
+"google","positive","126493495933403136"
+"google","positive","126493472931844096"
+"google","positive","126493405760069632"
+"google","positive","126493371790397440"
+"google","positive","126493365775777792"
+"google","positive","126493265661919232"
+"google","positive","126493192110612480"
+"google","positive","126493141271449600"
+"google","positive","126493101807239168"
+"google","positive","126493037110099968"
+"google","positive","126493034014720000"
+"google","positive","126492983968268289"
+"google","positive","126492981460082688"
+"google","positive","126492970793971712"
+"google","positive","126492852615262208"
+"google","positive","126492838870515713"
+"google","positive","126492656124702721"
+"google","positive","126492628152889344"
+"google","positive","126492595500236800"
+"google","positive","126492495394775043"
+"google","positive","126492415749128192"
+"google","positive","126492266016673794"
+"google","positive","126492259993657345"
+"google","positive","126492242373386240"
+"google","positive","126492101549621249"
+"google","positive","126492040954527744"
+"google","positive","126492009748889600"
+"google","positive","126491707247300608"
+"google","positive","126491689396338688"
+"google","positive","126491589852930048"
+"google","positive","126491556831182848"
+"google","positive","126491420260442112"
+"google","positive","126491304036278272"
+"google","positive","126491177754165252"
+"google","positive","126491109982613505"
+"google","positive","126491084854530049"
+"google","positive","126491009558392832"
+"google","positive","126490970803027969"
+"google","positive","126490696214511616"
+"google","positive","126490548306579457"
+"google","positive","126490282878443520"
+"google","positive","126490137944268800"
+"google","positive","126489924596793345"
+"google","positive","126489848004608000"
+"google","positive","126489841096597504"
+"google","positive","126489713782685696"
+"google","positive","126489671730597888"
+"google","positive","126489564415131649"
+"google","positive","126489424807735296"
+"google","positive","126489397507014656"
+"google","positive","126489393526616064"
+"google","positive","126489263025033216"
+"google","positive","126489200567664640"
+"google","positive","126489182263721984"
+"google","positive","126489085102661632"
+"google","positive","126489066077302784"
+"google","positive","126488933772169216"
+"google","positive","126488863374983168"
+"google","positive","126488589046517760"
+"google","positive","126488572772622336"
+"google","positive","126488519265894400"
+"google","positive","126488454921068544"
+"google","positive","126488447098695680"
+"google","positive","126488384410619906"
+"google","positive","126488304341360640"
+"google","positive","126488294325370880"
+"google","positive","126488282862338049"
+"google","positive","126488249739915264"
+"google","positive","126488202935664640"
+"google","positive","126488018616987648"
+"google","positive","126487901084196864"
+"google","positive","126487844830191617"
+"google","positive","126487738684948480"
+"google","positive","126487523366150144"
+"google","positive","126487508589621248"
+"google","positive","126487332865056768"
+"google","positive","126487323071365120"
+"google","positive","126487172487462912"
+"google","positive","126487153655029760"
+"google","positive","126487090723700737"
+"google","positive","126487082905518081"
+"google","positive","126486849706401792"
+"google","positive","126486332104130561"
+"google","positive","126486322675318784"
+"google","positive","126485171280166913"
+"google","positive","126484977176158208"
+"google","positive","126484769285480448"
+"google","positive","126484631389356032"
+"google","negative","126534476875567104"
+"google","negative","126533446293127168"
+"google","negative","126532210210783232"
+"google","negative","126527166195314688"
+"google","negative","126526648928579584"
+"google","negative","126524840126582784"
+"google","negative","126520550876127232"
+"google","negative","126520337289580544"
+"google","negative","126518920122335233"
+"google","negative","126517970179600385"
+"google","negative","126517608697708545"
+"google","negative","126516572343910400"
+"google","negative","126516408317251585"
+"google","negative","126515822750478337"
+"google","negative","126515697181409280"
+"google","negative","126514511862706176"
+"google","negative","126507644742672384"
+"google","negative","126506609823334400"
+"google","negative","126505594290057216"
+"google","negative","126505113362776064"
+"google","negative","126503098071007232"
+"google","negative","126502732369629184"
+"google","negative","126502274204831744"
+"google","negative","126501869920075776"
+"google","negative","126499555742203904"
+"google","negative","126497996350304257"
+"google","negative","126497929593761792"
+"google","negative","126497514168922112"
+"google","negative","126497333209858049"
+"google","negative","126496987192373248"
+"google","negative","126495219058348032"
+"google","negative","126494895501348864"
+"google","negative","126494402381225984"
+"google","negative","126494319749238784"
+"google","negative","126494303173361664"
+"google","negative","126494261922381824"
+"google","negative","126493910037037058"
+"google","negative","126493616519647232"
+"google","negative","126493030185316352"
+"google","negative","126492741764005890"
+"google","negative","126492244151771137"
+"google","negative","126491870900666368"
+"google","negative","126491825660887040"
+"google","negative","126491776482689024"
+"google","negative","126491509527805952"
+"google","negative","126491502493962240"
+"google","negative","126491480087986176"
+"google","negative","126490767345725441"
+"google","negative","126490750958567424"
+"google","negative","126490477288636416"
+"google","negative","126489936944832512"
+"google","negative","126489813468721152"
+"google","negative","126489698314104832"
+"google","negative","126488700975726593"
+"google","negative","126487669533442048"
+"google","negative","126487604035198976"
+"google","negative","126487206570373120"
+"google","negative","126487152338026496"
+"google","negative","126486838549557248"
+"google","negative","126486654830645249"
+"google","negative","126486634458914816"
+"google","neutral","126535080557551616"
+"google","neutral","126535043588964352"
+"google","neutral","126535016049160192"
+"google","neutral","126534917290070016"
+"google","neutral","126534362148782080"
+"google","neutral","126534144170790912"
+"google","neutral","126534083797987328"
+"google","neutral","126534080627093504"
+"google","neutral","126533944832311298"
+"google","neutral","126533740628422656"
+"google","neutral","126533730671149056"
+"google","neutral","126533567311392769"
+"google","neutral","126533547803680770"
+"google","neutral","126533491495157760"
+"google","neutral","126533487590248448"
+"google","neutral","126533485837037568"
+"google","neutral","126532820507176960"
+"google","neutral","126532798994583552"
+"google","neutral","126532569259970560"
+"google","neutral","126532361587396610"
+"google","neutral","126532316859338752"
+"google","neutral","126532178082410496"
+"google","neutral","126532141302558720"
+"google","neutral","126532121773883392"
+"google","neutral","126532064823619584"
+"google","neutral","126532054136524800"
+"google","neutral","126532019999096832"
+"google","neutral","126531893649874945"
+"google","neutral","126531814071336960"
+"google","neutral","126531693522857984"
+"google","neutral","126531628087513088"
+"google","neutral","126531561691693056"
+"google","neutral","126531560194326530"
+"google","neutral","126531556255870976"
+"google","neutral","126531542733430784"
+"google","neutral","126531524945395713"
+"google","neutral","126531522487533568"
+"google","neutral","126531297249202176"
+"google","neutral","126531285475799040"
+"google","neutral","126531278517448705"
+"google","neutral","126531161215344640"
+"google","neutral","126530795987939328"
+"google","neutral","126530766829129728"
+"google","neutral","126530553477468160"
+"google","neutral","126530551384506368"
+"google","neutral","126530550394650625"
+"google","neutral","126530532640169986"
+"google","neutral","126530487840813056"
+"google","neutral","126530423026233346"
+"google","neutral","126529904731881472"
+"google","neutral","126529833487446016"
+"google","neutral","126529492582797313"
+"google","neutral","126529490737303552"
+"google","neutral","126529470814363648"
+"google","neutral","126529468981452800"
+"google","neutral","126529403067969537"
+"google","neutral","126529287858819072"
+"google","neutral","126529224642281472"
+"google","neutral","126529168715431936"
+"google","neutral","126529111819681792"
+"google","neutral","126529099018674176"
+"google","neutral","126529019284946944"
+"google","neutral","126528767228248064"
+"google","neutral","126528551766867968"
+"google","neutral","126528448050118656"
+"google","neutral","126528342613700608"
+"google","neutral","126528318542589952"
+"google","neutral","126528316978102272"
+"google","neutral","126528307004051458"
+"google","neutral","126528290352660480"
+"google","neutral","126528288729473024"
+"google","neutral","126528287831891968"
+"google","neutral","126528286871396352"
+"google","neutral","126528248329940992"
+"google","neutral","126528157573578754"
+"google","neutral","126528078057963520"
+"google","neutral","126527897065373696"
+"google","neutral","126527760662413312"
+"google","neutral","126527757151776768"
+"google","neutral","126527756036091904"
+"google","neutral","126527743528673280"
+"google","neutral","126527523529043968"
+"google","neutral","126527262190346240"
+"google","neutral","126527122218041345"
+"google","neutral","126527077905207296"
+"google","neutral","126526945008689152"
+"google","neutral","126526850452299776"
+"google","neutral","126526838251065344"
+"google","neutral","126526793380409344"
+"google","neutral","126526782001262592"
+"google","neutral","126526594973052928"
+"google","neutral","126526457974489088"
+"google","neutral","126526371785752576"
+"google","neutral","126525872915226624"
+"google","neutral","126525730526998528"
+"google","neutral","126525539442892800"
+"google","neutral","126525506018492416"
+"google","neutral","126525491954987008"
+"google","neutral","126525475723022336"
+"google","neutral","126525465715425282"
+"google","neutral","126525210483638272"
+"google","neutral","126525169756946432"
+"google","neutral","126525031135195136"
+"google","neutral","126525007525462016"
+"google","neutral","126524889296412672"
+"google","neutral","126524843339431936"
+"google","neutral","126524841544257536"
+"google","neutral","126524784120037376"
+"google","neutral","126524764071264257"
+"google","neutral","126524648874717184"
+"google","neutral","126524290945384449"
+"google","neutral","126524242635403264"
+"google","neutral","126524229997953024"
+"google","neutral","126524074179567617"
+"google","neutral","126524050037145601"
+"google","neutral","126524003266473984"
+"google","neutral","126523791735144448"
+"google","neutral","126523776195231744"
+"google","neutral","126523549493112832"
+"google","neutral","126523519357030400"
+"google","neutral","126523435772948480"
+"google","neutral","126523421231300608"
+"google","neutral","126523245649342464"
+"google","neutral","126522997522702336"
+"google","neutral","126522979080339456"
+"google","neutral","126522978971299840"
+"google","neutral","126522853779705856"
+"google","neutral","126522802466598912"
+"google","neutral","126522667703599104"
+"google","neutral","126522664939565057"
+"google","neutral","126522423771267072"
+"google","neutral","126522225674301440"
+"google","neutral","126521973672120320"
+"google","neutral","126521718272573440"
+"google","neutral","126521582809128961"
+"google","neutral","126521573330010112"
+"google","neutral","126521564979150848"
+"google","neutral","126521489116766208"
+"google","neutral","126521384657616896"
+"google","neutral","126521228524658688"
+"google","neutral","126521130197585920"
+"google","neutral","126521102783619072"
+"google","neutral","126520984122572801"
+"google","neutral","126520920352358401"
+"google","neutral","126520914413236224"
+"google","neutral","126520707969581056"
+"google","neutral","126520695218913282"
+"google","neutral","126520611190218752"
+"google","neutral","126520531934654465"
+"google","neutral","126520511483219968"
+"google","neutral","126520508903718912"
+"google","neutral","126520504822661120"
+"google","neutral","126520501442056193"
+"google","neutral","126520500246687745"
+"google","neutral","126520116795015169"
+"google","neutral","126520107089395712"
+"google","neutral","126520074717765632"
+"google","neutral","126520020871294977"
+"google","neutral","126519912821825538"
+"google","neutral","126519469500669952"
+"google","neutral","126519331956862976"
+"google","neutral","126519306832982016"
+"google","neutral","126519290722648064"
+"google","neutral","126519251631747072"
+"google","neutral","126519229515169792"
+"google","neutral","126519227006988288"
+"google","neutral","126519072195223552"
+"google","neutral","126519044798029824"
+"google","neutral","126519034488426496"
+"google","neutral","126519019699314688"
+"google","neutral","126518955526455296"
+"google","neutral","126518773401391104"
+"google","neutral","126518709358575616"
+"google","neutral","126518669554626560"
+"google","neutral","126518471893848064"
+"google","neutral","126518469507289090"
+"google","neutral","126518466315431936"
+"google","neutral","126518461055774721"
+"google","neutral","126518313160409088"
+"google","neutral","126518280633593856"
+"google","neutral","126518252355584001"
+"google","neutral","126518222487961600"
+"google","neutral","126518218343972864"
+"google","neutral","126518146063532032"
+"google","neutral","126518121946292224"
+"google","neutral","126518025863168000"
+"google","neutral","126517966589267968"
+"google","neutral","126517947329036288"
+"google","neutral","126517930027515904"
+"google","neutral","126517793792344064"
+"google","neutral","126517583347326976"
+"google","neutral","126517558152134656"
+"google","neutral","126517526363504641"
+"google","neutral","126517510915899392"
+"google","neutral","126517487503294464"
+"google","neutral","126517179272278016"
+"google","neutral","126517076889321472"
+"google","neutral","126516982936895488"
+"google","neutral","126516936862474241"
+"google","neutral","126516898845310977"
+"google","neutral","126516776652640256"
+"google","neutral","126516749242871809"
+"google","neutral","126516744742387712"
+"google","neutral","126516481679818752"
+"google","neutral","126516342319890432"
+"google","neutral","126516212749443072"
+"google","neutral","126516054452211712"
+"google","neutral","126515992737218560"
+"google","neutral","126515637978796032"
+"google","neutral","126515624984846336"
+"google","neutral","126515576507084800"
+"google","neutral","126515415592607744"
+"google","neutral","126515341567344641"
+"google","neutral","126515215255871488"
+"google","neutral","126515104723374080"
+"google","neutral","126514999375048705"
+"google","neutral","126514945373376512"
+"google","neutral","126514719828885504"
+"google","neutral","126514719770161152"
+"google","neutral","126514718188900352"
+"google","neutral","126514697875894272"
+"google","neutral","126514441314500608"
+"google","neutral","126514438680489985"
+"google","neutral","126514435949993984"
+"google","neutral","126513983338450944"
+"google","neutral","126513869467299840"
+"google","neutral","126513499890397185"
+"google","neutral","126513430508212224"
+"google","neutral","126513429103128577"
+"google","neutral","126513426968215552"
+"google","neutral","126513317320736768"
+"google","neutral","126513312857985024"
+"google","neutral","126513307589935104"
+"google","neutral","126513117944496128"
+"google","neutral","126513115830554625"
+"google","neutral","126513110264717312"
+"google","neutral","126512968958607360"
+"google","neutral","126512932933734402"
+"google","neutral","126512842194161664"
+"google","neutral","126512471845515264"
+"google","neutral","126512460629946370"
+"google","neutral","126512277720535040"
+"google","neutral","126512269847826432"
+"google","neutral","126512265036959744"
+"google","neutral","126512261543116800"
+"google","neutral","126512199857475585"
+"google","neutral","126512130752126976"
+"google","neutral","126511907569020928"
+"google","neutral","126511799947366401"
+"google","neutral","126511624256372736"
+"google","neutral","126511578777526273"
+"google","neutral","126511507088478208"
+"google","neutral","126511425760935936"
+"google","neutral","126511400825790464"
+"google","neutral","126511380365983744"
+"google","neutral","126511358861778944"
+"google","neutral","126511243371610113"
+"google","neutral","126510976358035456"
+"google","neutral","126510915771301888"
+"google","neutral","126510815246422016"
+"google","neutral","126510801803673600"
+"google","neutral","126510781801046016"
+"google","neutral","126510731549085697"
+"google","neutral","126510402359140352"
+"google","neutral","126510393114898432"
+"google","neutral","126510377558216704"
+"google","neutral","126510284536942592"
+"google","neutral","126510143541231616"
+"google","neutral","126510049039368192"
+"google","neutral","126509936518762496"
+"google","neutral","126509922505596928"
+"google","neutral","126509783665745920"
+"google","neutral","126509619819462657"
+"google","neutral","126509495907135489"
+"google","neutral","126509474935611392"
+"google","neutral","126509381327134720"
+"google","neutral","126509283260108800"
+"google","neutral","126509226720903168"
+"google","neutral","126509135842914304"
+"google","neutral","126508885992415232"
+"google","neutral","126508842367463424"
+"google","neutral","126508789254979584"
+"google","neutral","126508753997668352"
+"google","neutral","126508642060083200"
+"google","neutral","126508495255257088"
+"google","neutral","126508398522019840"
+"google","neutral","126508044187217920"
+"google","neutral","126508037400825857"
+"google","neutral","126508035416928256"
+"google","neutral","126507982543532034"
+"google","neutral","126507925337411584"
+"google","neutral","126507911844339712"
+"google","neutral","126507657472393216"
+"google","neutral","126507418539671552"
+"google","neutral","126506926069645312"
+"google","neutral","126506902145347584"
+"google","neutral","126506895124086784"
+"google","neutral","126506846499520512"
+"google","neutral","126506795958153216"
+"google","neutral","126506701259157504"
+"google","neutral","126506651271430144"
+"google","neutral","126506310228377600"
+"google","neutral","126506306201858049"
+"google","neutral","126506273662447616"
+"google","neutral","126506272152502272"
+"google","neutral","126506232432439296"
+"google","neutral","126506073099218945"
+"google","neutral","126505781553147904"
+"google","neutral","126505768299147264"
+"google","neutral","126505670446022656"
+"google","neutral","126505612690456576"
+"google","neutral","126505607116238848"
+"google","neutral","126505469601775616"
+"google","neutral","126505424823402496"
+"google","neutral","126505412068511745"
+"google","neutral","126505392191705088"
+"google","neutral","126505364307984384"
+"google","neutral","126505359161573377"
+"google","neutral","126505133587709953"
+"google","neutral","126505125853401088"
+"google","neutral","126505094664568833"
+"google","neutral","126505086041067520"
+"google","neutral","126504998799552513"
+"google","neutral","126504923687960576"
+"google","neutral","126504910844989440"
+"google","neutral","126504893354741760"
+"google","neutral","126504842469457922"
+"google","neutral","126504774047772672"
+"google","neutral","126504709036056576"
+"google","neutral","126504479595044864"
+"google","neutral","126504427275300864"
+"google","neutral","126504377862205441"
+"google","neutral","126504319859175424"
+"google","neutral","126504126665334784"
+"google","neutral","126504115508494337"
+"google","neutral","126504105530236928"
+"google","neutral","126504077831045122"
+"google","neutral","126503993768804352"
+"google","neutral","126503947560165376"
+"google","neutral","126503755331022849"
+"google","neutral","126503706760974337"
+"google","neutral","126503703514578944"
+"google","neutral","126503693355982849"
+"google","neutral","126503688595447810"
+"google","neutral","126503683142852608"
+"google","neutral","126503627706732544"
+"google","neutral","126503364103118848"
+"google","neutral","126503349657935872"
+"google","neutral","126503348613558272"
+"google","neutral","126503090487705601"
+"google","neutral","126503088461848576"
+"google","neutral","126502770130960384"
+"google","neutral","126502761608122368"
+"google","neutral","126502730264088576"
+"google","neutral","126502630578069504"
+"google","neutral","126502626085969920"
+"google","neutral","126502616128684032"
+"google","neutral","126502614086070273"
+"google","neutral","126502326356815872"
+"google","neutral","126502296916987904"
+"google","neutral","126502291799945217"
+"google","neutral","126502250041454593"
+"google","neutral","126502128737976321"
+"google","neutral","126502101944778752"
+"google","neutral","126502100308996097"
+"google","neutral","126501988132327425"
+"google","neutral","126501888756682752"
+"google","neutral","126501882331017216"
+"google","neutral","126501647378690048"
+"google","neutral","126501582274707457"
+"google","neutral","126501535160074240"
+"google","neutral","126501463752060928"
+"google","neutral","126501463726899200"
+"google","neutral","126501463672369152"
+"google","neutral","126501463663976448"
+"google","neutral","126501463659790337"
+"google","neutral","126501463626223616"
+"google","neutral","126501463554924544"
+"google","neutral","126501463529754624"
+"google","neutral","126501463487815680"
+"google","neutral","126501428897382400"
+"google","neutral","126501392163684353"
+"google","neutral","126501360089825280"
+"google","neutral","126501209887621121"
+"google","neutral","126501176559677441"
+"google","neutral","126501155999203328"
+"google","neutral","126500867280093184"
+"google","neutral","126500856026771456"
+"google","neutral","126500659833995265"
+"google","neutral","126500532478148610"
+"google","neutral","126500276994711553"
+"google","neutral","126500259974234112"
+"google","neutral","126500105762250752"
+"google","neutral","126499963176886272"
+"google","neutral","126499722805522433"
+"google","neutral","126499712164560896"
+"google","neutral","126499577443532801"
+"google","neutral","126499521822867458"
+"google","neutral","126499521344712704"
+"google","neutral","126499346022805504"
+"google","neutral","126499314393546752"
+"google","neutral","126499307284217856"
+"google","neutral","126499293895987201"
+"google","neutral","126499217807122433"
+"google","neutral","126498840831475712"
+"google","neutral","126498825992019969"
+"google","neutral","126498759143211008"
+"google","neutral","126498660811935744"
+"google","neutral","126498593770184704"
+"google","neutral","126498481534808064"
+"google","neutral","126498259228303360"
+"google","neutral","126498218501607426"
+"google","neutral","126498101124005889"
+"google","neutral","126497969972330497"
+"google","neutral","126497822009860096"
+"google","neutral","126497642493648897"
+"google","neutral","126497423815213056"
+"google","neutral","126497346870718464"
+"google","neutral","126497339933327360"
+"google","neutral","126497339073495040"
+"google","neutral","126497335831306240"
+"google","neutral","126497288301457408"
+"google","neutral","126497256382803968"
+"google","neutral","126497160345829376"
+"google","neutral","126497100866387969"
+"google","neutral","126497008197435392"
+"google","neutral","126496951746301952"
+"google","neutral","126496930204352512"
+"google","neutral","126496891797118976"
+"google","neutral","126496853742198784"
+"google","neutral","126496835933179904"
+"google","neutral","126496835920609280"
+"google","neutral","126496802634612736"
+"google","neutral","126496726143086592"
+"google","neutral","126496688163655680"
+"google","neutral","126496634120056832"
+"google","neutral","126496571541045248"
+"google","neutral","126496553811709952"
+"google","neutral","126496553748803586"
+"google","neutral","126496493648609280"
+"google","neutral","126496395132796929"
+"google","neutral","126496303738912769"
+"google","neutral","126496230984519680"
+"google","neutral","126496228572790784"
+"google","neutral","126496131554344960"
+"google","neutral","126496131545960449"
+"google","neutral","126496131529183232"
+"google","neutral","126496131441102848"
+"google","neutral","126496131411742721"
+"google","neutral","126496131382382592"
+"google","neutral","126496131378188289"
+"google","neutral","126496131374006272"
+"google","neutral","126496131353018368"
+"google","neutral","126496068467822593"
+"google","neutral","126495741014315008"
+"google","neutral","126495706356789248"
+"google","neutral","126495690636529664"
+"google","neutral","126495653747634176"
+"google","neutral","126495340026273792"
+"google","neutral","126495319792959488"
+"google","neutral","126495268538560512"
+"google","neutral","126495169536208896"
+"google","neutral","126495125303083009"
+"google","neutral","126495072102522880"
+"google","neutral","126495001868906496"
+"google","neutral","126494909548077056"
+"google","neutral","126494729474015232"
+"google","neutral","126494715024637952"
+"google","neutral","126494709605609472"
+"google","neutral","126494691016441857"
+"google","neutral","126494645403398145"
+"google","neutral","126494569184505856"
+"google","neutral","126494532857643008"
+"google","neutral","126494358508806144"
+"google","neutral","126494314057568256"
+"google","neutral","126494270650724353"
+"google","neutral","126494239143116801"
+"google","neutral","126494171065364480"
+"google","neutral","126494094276042752"
+"google","neutral","126493962499391488"
+"google","neutral","126493954429550593"
+"google","neutral","126493895625424896"
+"google","neutral","126493811638677504"
+"google","neutral","126493785533321216"
+"google","neutral","126493625285750784"
+"google","neutral","126493567182061569"
+"google","neutral","126493558365630465"
+"google","neutral","126493553055633408"
+"google","neutral","126493550618742784"
+"google","neutral","126493370028797953"
+"google","neutral","126493352370765824"
+"google","neutral","126493335518068736"
+"google","neutral","126493327540502528"
+"google","neutral","126493322268250114"
+"google","neutral","126493312650719232"
+"google","neutral","126493280832724992"
+"google","neutral","126493225811841024"
+"google","neutral","126493189254291457"
+"google","neutral","126493174477758464"
+"google","neutral","126493155855052800"
+"google","neutral","126493015907897344"
+"google","neutral","126492985834733568"
+"google","neutral","126492945057718272"
+"google","neutral","126492886517809152"
+"google","neutral","126492864174755841"
+"google","neutral","126492839998783488"
+"google","neutral","126492795027456000"
+"google","neutral","126492775930798080"
+"google","neutral","126492775842713600"
+"google","neutral","126492759262633984"
+"google","neutral","126492737766834177"
+"google","neutral","126492735598366720"
+"google","neutral","126492726601584640"
+"google","neutral","126492719987179520"
+"google","neutral","126492692984238080"
+"google","neutral","126492604799004672"
+"google","neutral","126492471759867904"
+"google","neutral","126492440914956290"
+"google","neutral","126492364138221569"
+"google","neutral","126492325219278848"
+"google","neutral","126492310866362368"
+"google","neutral","126492274766004226"
+"google","neutral","126492105026703360"
+"google","neutral","126492088270454785"
+"google","neutral","126492059308785664"
+"google","neutral","126491876860755969"
+"google","neutral","126491752260575232"
+"google","neutral","126491726276853760"
+"google","neutral","126491658341715969"
+"google","neutral","126491610199490560"
+"google","neutral","126491517924810752"
+"google","neutral","126491513038442496"
+"google","neutral","126491459078729728"
+"google","neutral","126491267411619840"
+"google","neutral","126491255436881920"
+"google","neutral","126491202810953728"
+"google","neutral","126491025643552768"
+"google","neutral","126490806315008000"
+"google","neutral","126490801982275584"
+"google","neutral","126490797225934850"
+"google","neutral","126490763151421440"
+"google","neutral","126490760114733056"
+"google","neutral","126490712299675649"
+"google","neutral","126490709921497088"
+"google","neutral","126490662416826369"
+"google","neutral","126490566837026817"
+"google","neutral","126490278143082496"
+"google","neutral","126490265463701504"
+"google","neutral","126490235877081088"
+"google","neutral","126490166897541120"
+"google","neutral","126490158815125504"
+"google","neutral","126490134869852161"
+"google","neutral","126490118050684928"
+"google","neutral","126490034865045504"
+"google","neutral","126490011901231104"
+"google","neutral","126489950614073345"
+"google","neutral","126489908998176769"
+"google","neutral","126489751325908992"
+"google","neutral","126489719012990976"
+"google","neutral","126489665116192768"
+"google","neutral","126489609889783808"
+"google","neutral","126489523776536576"
+"google","neutral","126489506672160768"
+"google","neutral","126489489328705536"
+"google","neutral","126489300828307456"
+"google","neutral","126489263490596864"
+"google","neutral","126489146029129729"
+"google","neutral","126489064319881216"
+"google","neutral","126489048717074432"
+"google","neutral","126488983164289026"
+"google","neutral","126488935026266112"
+"google","neutral","126488912037289984"
+"google","neutral","126488727315943425"
+"google","neutral","126488649905864705"
+"google","neutral","126488582218203136"
+"google","neutral","126488561888399360"
+"google","neutral","126488352135450625"
+"google","neutral","126487924043821057"
+"google","neutral","126487912433975297"
+"google","neutral","126487846038147073"
+"google","neutral","126487807156944899"
+"google","neutral","126487788433584129"
+"google","neutral","126487744787660800"
+"google","neutral","126487541569433600"
+"google","neutral","126487465203736576"
+"google","neutral","126487372039847937"
+"google","neutral","126487167823388673"
+"google","neutral","126487043462266880"
+"google","neutral","126486964408033280"
+"google","neutral","126486941364527104"
+"google","neutral","126486926827065344"
+"google","neutral","126486798640754688"
+"google","neutral","126486616343724032"
+"google","neutral","126486384902017024"
+"google","neutral","126486348713570304"
+"google","neutral","126486111689256960"
+"google","neutral","126486051530354689"
+"google","neutral","126485712836112384"
+"google","neutral","126485684113522689"
+"google","neutral","126485085942845440"
+"google","neutral","126484200269426688"
+"google","neutral","126484162302586880"
+"google","neutral","126484021369778177"
+"google","neutral","126484018211454976"
+"google","irrelevant","126535062148759552"
+"google","irrelevant","126534927637417984"
+"google","irrelevant","126534908670783489"
+"google","irrelevant","126534871299538944"
+"google","irrelevant","126534769105305600"
+"google","irrelevant","126534678156029953"
+"google","irrelevant","126534649995464704"
+"google","irrelevant","126534648800096256"
+"google","irrelevant","126534648611340288"
+"google","irrelevant","126534647264972800"
+"google","irrelevant","126534525089091584"
+"google","irrelevant","126534223950651392"
+"google","irrelevant","126534127435530240"
+"google","irrelevant","126534054739836929"
+"google","irrelevant","126534037929074688"
+"google","irrelevant","126533966156148736"
+"google","irrelevant","126533775411781632"
+"google","irrelevant","126533688321253376"
+"google","irrelevant","126533686282825728"
+"google","irrelevant","126533684252774401"
+"google","irrelevant","126533682273071105"
+"google","irrelevant","126533680301752320"
+"google","irrelevant","126533636576129024"
+"google","irrelevant","126533618997792768"
+"google","irrelevant","126533452618137600"
+"google","irrelevant","126533364789424129"
+"google","irrelevant","126533325702701056"
+"google","irrelevant","126533318928896001"
+"google","irrelevant","126533134819921920"
+"google","irrelevant","126533119573630976"
+"google","irrelevant","126533008760111104"
+"google","irrelevant","126532931236794369"
+"google","irrelevant","126532704287199232"
+"google","irrelevant","126532561315958784"
+"google","irrelevant","126532543158820864"
+"google","irrelevant","126532472258301952"
+"google","irrelevant","126532467954950144"
+"google","irrelevant","126532295665532928"
+"google","irrelevant","126532294122024960"
+"google","irrelevant","126532119278264320"
+"google","irrelevant","126532046360289280"
+"google","irrelevant","126531989846241280"
+"google","irrelevant","126531827073679360"
+"google","irrelevant","126531797419950080"
+"google","irrelevant","126531615181651968"
+"google","irrelevant","126531519824142337"
+"google","irrelevant","126531381152059392"
+"google","irrelevant","126531281650589696"
+"google","irrelevant","126531280253882368"
+"google","irrelevant","126531278685216768"
+"google","irrelevant","126531098279804928"
+"google","irrelevant","126530329820397568"
+"google","irrelevant","126530251684720641"
+"google","irrelevant","126530242612432898"
+"google","irrelevant","126530163029704705"
+"google","irrelevant","126530054023946240"
+"google","irrelevant","126530000303292416"
+"google","irrelevant","126529908850700289"
+"google","irrelevant","126529643829399553"
+"google","irrelevant","126529610614718464"
+"google","irrelevant","126529609771659264"
+"google","irrelevant","126529491819433985"
+"google","irrelevant","126529490582118400"
+"google","irrelevant","126529267503861760"
+"google","irrelevant","126529265142472704"
+"google","irrelevant","126529171773067265"
+"google","irrelevant","126529003879272448"
+"google","irrelevant","126529001866002432"
+"google","irrelevant","126528999554949120"
+"google","irrelevant","126528997763989504"
+"google","irrelevant","126528993187999744"
+"google","irrelevant","126528938326495232"
+"google","irrelevant","126528826279866369"
+"google","irrelevant","126528658834853888"
+"google","irrelevant","126528444476555264"
+"google","irrelevant","126528018469494784"
+"google","irrelevant","126527955475251202"
+"google","irrelevant","126527760746295296"
+"google","irrelevant","126527053133656066"
+"google","irrelevant","126527051292356609"
+"google","irrelevant","126526946946457601"
+"google","irrelevant","126526850477469696"
+"google","irrelevant","126526815010426880"
+"google","irrelevant","126526781019787264"
+"google","irrelevant","126526765995802624"
+"google","irrelevant","126526602686369792"
+"google","irrelevant","126526465280970752"
+"google","irrelevant","126526113131413504"
+"google","irrelevant","126526082953379840"
+"google","irrelevant","126526068835368961"
+"google","irrelevant","126526020923834368"
+"google","irrelevant","126526019602628608"
+"google","irrelevant","126526019208351744"
+"google","irrelevant","126526017660653568"
+"google","irrelevant","126525994348711937"
+"google","irrelevant","126525853738860544"
+"google","irrelevant","126525817713991680"
+"google","irrelevant","126525815084158976"
+"google","irrelevant","126525130426957824"
+"google","irrelevant","126524896665796608"
+"google","irrelevant","126524657875697664"
+"google","irrelevant","126524361065758720"
+"google","irrelevant","126524217889001472"
+"google","irrelevant","126524216412618752"
+"google","irrelevant","126524213401096192"
+"google","irrelevant","126523942314840064"
+"google","irrelevant","126523941035577344"
+"google","irrelevant","126523916817674240"
+"google","irrelevant","126523574470189057"
+"google","irrelevant","126523560096313344"
+"google","irrelevant","126523556958961664"
+"google","irrelevant","126523548914290688"
+"google","irrelevant","126523356773232641"
+"google","irrelevant","126523270420905984"
+"google","irrelevant","126523229400600576"
+"google","irrelevant","126523147091578880"
+"google","irrelevant","126523008524365825"
+"google","irrelevant","126522989633212417"
+"google","irrelevant","126522783650955264"
+"google","irrelevant","126522732195233792"
+"google","irrelevant","126522730001600512"
+"google","irrelevant","126522728101584897"
+"google","irrelevant","126522726239313920"
+"google","irrelevant","126522723441721345"
+"google","irrelevant","126522646149087233"
+"google","irrelevant","126522540834304000"
+"google","irrelevant","126522492700471297"
+"google","irrelevant","126522478280450048"
+"google","irrelevant","126522273963319298"
+"google","irrelevant","126522127775047680"
+"google","irrelevant","126521734076698625"
+"google","irrelevant","126521694583132160"
+"google","irrelevant","126521635464425472"
+"google","irrelevant","126521523220652032"
+"google","irrelevant","126521505097068544"
+"google","irrelevant","126521233603960832"
+"google","irrelevant","126520774025678848"
+"google","irrelevant","126520080543649792"
+"google","irrelevant","126519943234732032"
+"google","irrelevant","126519837173358592"
+"google","irrelevant","126519715085549568"
+"google","irrelevant","126519472172445696"
+"google","irrelevant","126519390245109760"
+"google","irrelevant","126519359714766848"
+"google","irrelevant","126518917635125248"
+"google","irrelevant","126518845983830016"
+"google","irrelevant","126518328373153792"
+"google","irrelevant","126517747894075392"
+"google","irrelevant","126517575336214529"
+"google","irrelevant","126517570806358018"
+"google","irrelevant","126517567694180352"
+"google","irrelevant","126517492049915904"
+"google","irrelevant","126517413788401664"
+"google","irrelevant","126517183139430400"
+"google","irrelevant","126516892029558784"
+"google","irrelevant","126516806360899584"
+"google","irrelevant","126516804108562433"
+"google","irrelevant","126516802011402240"
+"google","irrelevant","126516602316406784"
+"google","irrelevant","126516523408949248"
+"google","irrelevant","126516376566366208"
+"google","irrelevant","126516048961863681"
+"google","irrelevant","126516048626335744"
+"google","irrelevant","126515979059609603"
+"google","irrelevant","126515953516298241"
+"google","irrelevant","126515696497725440"
+"google","irrelevant","126515695352688640"
+"google","irrelevant","126515693993738240"
+"google","irrelevant","126515640889655296"
+"google","irrelevant","126515639362920449"
+"google","irrelevant","126515633314729984"
+"google","irrelevant","126515631980937216"
+"google","irrelevant","126515608887115776"
+"google","irrelevant","126515498467860480"
+"google","irrelevant","126515353110065152"
+"google","irrelevant","126515159995912193"
+"google","irrelevant","126515137212456960"
+"google","irrelevant","126514888951595008"
+"google","irrelevant","126514866059100160"
+"google","irrelevant","126514704347693056"
+"google","irrelevant","126514555743518720"
+"google","irrelevant","126514131544178688"
+"google","irrelevant","126513979706183682"
+"google","irrelevant","126513961553244160"
+"google","irrelevant","126513949163257856"
+"google","irrelevant","126513706912841729"
+"google","irrelevant","126513429409312768"
+"google","irrelevant","126513410128089088"
+"google","irrelevant","126513333191979008"
+"google","irrelevant","126512924385738752"
+"google","irrelevant","126512627110252544"
+"google","irrelevant","126512625713545216"
+"google","irrelevant","126512053660827648"
+"google","irrelevant","126511257170886656"
+"google","irrelevant","126510558764736513"
+"google","irrelevant","126510399884496897"
+"google","irrelevant","126509931657564160"
+"google","irrelevant","126509472834269184"
+"google","irrelevant","126509298279907328"
+"google","irrelevant","126509273994891264"
+"google","irrelevant","126509203148902400"
+"google","irrelevant","126508947980029952"
+"google","irrelevant","126508734884220929"
+"google","irrelevant","126508667007803392"
+"google","irrelevant","126508103368835072"
+"google","irrelevant","126507946707398656"
+"google","irrelevant","126507876469583872"
+"google","irrelevant","126507802251370497"
+"google","irrelevant","126507794777128964"
+"google","irrelevant","126507720546328576"
+"google","irrelevant","126507456649101314"
+"google","irrelevant","126507441985826816"
+"google","irrelevant","126507086522748929"
+"google","irrelevant","126507016368816129"
+"google","irrelevant","126506951063511041"
+"google","irrelevant","126506683131379713"
+"google","irrelevant","126506375886016513"
+"google","irrelevant","126506159938088961"
+"google","irrelevant","126506135082631168"
+"google","irrelevant","126505911584948225"
+"google","irrelevant","126505668181098496"
+"google","irrelevant","126505497888161792"
+"google","irrelevant","126505070098518016"
+"google","irrelevant","126504896307539968"
+"google","irrelevant","126504863369670657"
+"google","irrelevant","126504793039572992"
+"google","irrelevant","126504734864576512"
+"google","irrelevant","126504701142376448"
+"google","irrelevant","126504580145094656"
+"google","irrelevant","126504516479754241"
+"google","irrelevant","126504512960724992"
+"google","irrelevant","126504398925996032"
+"google","irrelevant","126504335734607873"
+"google","irrelevant","126504297763581952"
+"google","irrelevant","126504285436514304"
+"google","irrelevant","126504090053259265"
+"google","irrelevant","126504049393672192"
+"google","irrelevant","126503517149069313"
+"google","irrelevant","126503460836343810"
+"google","irrelevant","126503277117452288"
+"google","irrelevant","126502927337660416"
+"google","irrelevant","126502611657564160"
+"google","irrelevant","126502546486464512"
+"google","irrelevant","126502150472871937"
+"google","irrelevant","126502129060950016"
+"google","irrelevant","126501952317165568"
+"google","irrelevant","126501402397782016"
+"google","irrelevant","126501224148242432"
+"google","irrelevant","126501185321566208"
+"google","irrelevant","126501161502126080"
+"google","irrelevant","126501085794930688"
+"google","irrelevant","126500586681143296"
+"google","irrelevant","126500532671086592"
+"google","irrelevant","126500515872915457"
+"google","irrelevant","126500195570683905"
+"google","irrelevant","126500127354535937"
+"google","irrelevant","126500080659341315"
+"google","irrelevant","126500021863579648"
+"google","irrelevant","126500018000633858"
+"google","irrelevant","126499879638925312"
+"google","irrelevant","126499873821437952"
+"google","irrelevant","126499835481292801"
+"google","irrelevant","126499741176561664"
+"google","irrelevant","126499498133426176"
+"google","irrelevant","126499369905160192"
+"google","irrelevant","126499359528468480"
+"google","irrelevant","126499266851119104"
+"google","irrelevant","126499256503762944"
+"google","irrelevant","126499194247725056"
+"google","irrelevant","126499065423859712"
+"google","irrelevant","126498881142927360"
+"google","irrelevant","126498880048214016"
+"google","irrelevant","126498875405115392"
+"google","irrelevant","126498743993376768"
+"google","irrelevant","126498725471334401"
+"google","irrelevant","126498584110710784"
+"google","irrelevant","126498451289673728"
+"google","irrelevant","126498295219621888"
+"google","irrelevant","126498266799013888"
+"google","irrelevant","126497733262585856"
+"google","irrelevant","126497725633150977"
+"google","irrelevant","126497618258964480"
+"google","irrelevant","126497611296407552"
+"google","irrelevant","126497446955188224"
+"google","irrelevant","126497409340686336"
+"google","irrelevant","126497063742603264"
+"google","irrelevant","126497027273129984"
+"google","irrelevant","126496716789792768"
+"google","irrelevant","126496706257895424"
+"google","irrelevant","126496681717014528"
+"google","irrelevant","126496237879959553"
+"google","irrelevant","126496216052801536"
+"google","irrelevant","126496203822211072"
+"google","irrelevant","126496202693939200"
+"google","irrelevant","126496202509389824"
+"google","irrelevant","126496200668098561"
+"google","irrelevant","126495762568851456"
+"google","irrelevant","126495739663757313"
+"google","irrelevant","126495620025417729"
+"google","irrelevant","126495448436453377"
+"google","irrelevant","126495346569392129"
+"google","irrelevant","126495269201264640"
+"google","irrelevant","126495253535531008"
+"google","irrelevant","126495253321613312"
+"google","irrelevant","126495251757150208"
+"google","irrelevant","126495232417218560"
+"google","irrelevant","126495208505479168"
+"google","irrelevant","126495101131309056"
+"google","irrelevant","126495097180262400"
+"google","irrelevant","126495028548874241"
+"google","irrelevant","126495020156063744"
+"google","irrelevant","126494978561146881"
+"google","irrelevant","126494884726190080"
+"google","irrelevant","126494808847040513"
+"google","irrelevant","126494774428565504"
+"google","irrelevant","126494752517537792"
+"google","irrelevant","126494681617010689"
+"google","irrelevant","126494434887090177"
+"google","irrelevant","126494286316445696"
+"google","irrelevant","126494260269821952"
+"google","irrelevant","126494247082934272"
+"google","irrelevant","126494176551514112"
+"google","irrelevant","126494166145437696"
+"google","irrelevant","126494156368523267"
+"google","irrelevant","126494152375533568"
+"google","irrelevant","126494104187183104"
+"google","irrelevant","126494033882259458"
+"google","irrelevant","126493930794659840"
+"google","irrelevant","126493860804308992"
+"google","irrelevant","126493833608441856"
+"google","irrelevant","126493751215525889"
+"google","irrelevant","126493741354713088"
+"google","irrelevant","126493715933052928"
+"google","irrelevant","126493543249358848"
+"google","irrelevant","126493540053303296"
+"google","irrelevant","126493537540907008"
+"google","irrelevant","126493534336458752"
+"google","irrelevant","126493525578747905"
+"google","irrelevant","126493517622149121"
+"google","irrelevant","126493505047629824"
+"google","irrelevant","126493282355261440"
+"google","irrelevant","126493154219266048"
+"google","irrelevant","126493144824020993"
+"google","irrelevant","126493116554424320"
+"google","irrelevant","126493078717603840"
+"google","irrelevant","126493008244912128"
+"google","irrelevant","126492972735922177"
+"google","irrelevant","126492948740313088"
+"google","irrelevant","126492905476067328"
+"google","irrelevant","126492820130373632"
+"google","irrelevant","126492770348171264"
+"google","irrelevant","126492723673960448"
+"google","irrelevant","126492704707321856"
+"google","irrelevant","126492543146926080"
+"google","irrelevant","126492542610051072"
+"google","irrelevant","126492533860728832"
+"google","irrelevant","126492487111028736"
+"google","irrelevant","126492457276940288"
+"google","irrelevant","126492452990369792"
+"google","irrelevant","126492366776446976"
+"google","irrelevant","126492339559608320"
+"google","irrelevant","126492333519802368"
+"google","irrelevant","126492258504683520"
+"google","irrelevant","126492256868900865"
+"google","irrelevant","126492248547405825"
+"google","irrelevant","126492186060656640"
+"google","irrelevant","126492179525931009"
+"google","irrelevant","126492130154774528"
+"google","irrelevant","126492053248020481"
+"google","irrelevant","126492052371406848"
+"google","irrelevant","126492019009916928"
+"google","irrelevant","126492011896373249"
+"google","irrelevant","126492010864574464"
+"google","irrelevant","126491986927685632"
+"google","irrelevant","126491961271136256"
+"google","irrelevant","126491942077992961"
+"google","irrelevant","126491928320688128"
+"google","irrelevant","126491895512838144"
+"google","irrelevant","126491807205965825"
+"google","irrelevant","126491801723994114"
+"google","irrelevant","126491743301537792"
+"google","irrelevant","126491727673569280"
+"google","irrelevant","126491723353427968"
+"google","irrelevant","126491662879965184"
+"google","irrelevant","126491659008610304"
+"google","irrelevant","126491616641953792"
+"google","irrelevant","126491544558632960"
+"google","irrelevant","126491450035814400"
+"google","irrelevant","126491409871155200"
+"google","irrelevant","126491356481859585"
+"google","irrelevant","126491323774672896"
+"google","irrelevant","126491290627080194"
+"google","irrelevant","126491272088260609"
+"google","irrelevant","126491237720141825"
+"google","irrelevant","126491078797950976"
+"google","irrelevant","126491075870343168"
+"google","irrelevant","126490998816768001"
+"google","irrelevant","126490976565985281"
+"google","irrelevant","126490918885920768"
+"google","irrelevant","126490858735407104"
+"google","irrelevant","126490790150144000"
+"google","irrelevant","126490759766618112"
+"google","irrelevant","126490644616200192"
+"google","irrelevant","126490589125550080"
+"google","irrelevant","126490558230302721"
+"google","irrelevant","126490549367738368"
+"google","irrelevant","126490540794576896"
+"google","irrelevant","126490516186595328"
+"google","irrelevant","126490479859736576"
+"google","irrelevant","126490448436015104"
+"google","irrelevant","126490435836325888"
+"google","irrelevant","126490420808126464"
+"google","irrelevant","126490021493616640"
+"google","irrelevant","126489969677176832"
+"google","irrelevant","126489957538873344"
+"google","irrelevant","126489935313256449"
+"google","irrelevant","126489934088511490"
+"google","irrelevant","126489915042168833"
+"google","irrelevant","126489907349831680"
+"google","irrelevant","126489892678144000"
+"google","irrelevant","126489830677942272"
+"google","irrelevant","126489823103041537"
+"google","irrelevant","126489808913694720"
+"google","irrelevant","126489703418568705"
+"google","irrelevant","126489580835848193"
+"google","irrelevant","126489542436995072"
+"google","irrelevant","126489540906070018"
+"google","irrelevant","126489462648745984"
+"google","irrelevant","126489460551585792"
+"google","irrelevant","126489430830743552"
+"google","irrelevant","126489415248920576"
+"google","irrelevant","126489326786850816"
+"google","irrelevant","126489254535774208"
+"google","irrelevant","126489247581609986"
+"google","irrelevant","126489192866910208"
+"google","irrelevant","126489107143737344"
+"google","irrelevant","126489099858214914"
+"google","irrelevant","126489084003762176"
+"google","irrelevant","126489023928741890"
+"google","irrelevant","126489012423770113"
+"google","irrelevant","126488974595334144"
+"google","irrelevant","126488887026655232"
+"google","irrelevant","126488858815774721"
+"google","irrelevant","126488807922085889"
+"google","irrelevant","126488559795453954"
+"google","irrelevant","126488553352994817"
+"google","irrelevant","126488503428190208"
+"google","irrelevant","126488427779727360"
+"google","irrelevant","126488387531194369"
+"google","irrelevant","126488315041030144"
+"google","irrelevant","126488313187143681"
+"google","irrelevant","126488305859698688"
+"google","irrelevant","126488293717192704"
+"google","irrelevant","126488289988452352"
+"google","irrelevant","126488289157976064"
+"google","irrelevant","126488234359406593"
+"google","irrelevant","126488066977308673"
+"google","irrelevant","126488061369532417"
+"google","irrelevant","126488048962772992"
+"google","irrelevant","126487986425696259"
+"google","irrelevant","126487974975242240"
+"google","irrelevant","126487943509581824"
+"google","irrelevant","126487924698128384"
+"google","irrelevant","126487924249329664"
+"google","irrelevant","126487911465095169"
+"google","irrelevant","126487879139590144"
+"google","irrelevant","126487855362088961"
+"google","irrelevant","126487852824526848"
+"google","irrelevant","126487831572004864"
+"google","irrelevant","126487783077449728"
+"google","irrelevant","126487738294861824"
+"google","irrelevant","126487624444674048"
+"google","irrelevant","126487457381359617"
+"google","irrelevant","126487454797668353"
+"google","irrelevant","126487422249861120"
+"google","irrelevant","126487408500940800"
+"google","irrelevant","126487406131150848"
+"google","irrelevant","126487403220320258"
+"google","irrelevant","126487385461633024"
+"google","irrelevant","126487318650568704"
+"google","irrelevant","126487165969510400"
+"google","irrelevant","126487155886407680"
+"google","irrelevant","126487143555153920"
+"google","irrelevant","126487140317147136"
+"google","irrelevant","126486968682037248"
+"google","irrelevant","126486956149448704"
+"google","irrelevant","126486814520381440"
+"google","irrelevant","126486790818373632"
+"google","irrelevant","126486753598119936"
+"google","irrelevant","126486526631743488"
+"google","irrelevant","126486211824058368"
+"google","irrelevant","126486125874384896"
+"google","irrelevant","126485882265018368"
+"google","irrelevant","126485702056751105"
+"google","irrelevant","126485474016628736"
+"google","irrelevant","126484568239906817"
+"google","irrelevant","126484213737340928"
+"google","irrelevant","126484000075292672"
+"microsoft","positive","126803641486163969"
+"microsoft","positive","126792129832951808"
+"microsoft","positive","126788430679113728"
+"microsoft","positive","126780006964805632"
+"microsoft","positive","126779403605770241"
+"microsoft","positive","126779217911349248"
+"microsoft","positive","126774092274741248"
+"microsoft","positive","126768366345138176"
+"microsoft","positive","126767508253454336"
+"microsoft","positive","126764085609111552"
+"microsoft","positive","126762337087655936"
+"microsoft","positive","126755785391869954"
+"microsoft","positive","126755232393867264"
+"microsoft","positive","126754593714606085"
+"microsoft","positive","126754500278104064"
+"microsoft","positive","126750973329817601"
+"microsoft","positive","126749591956762624"
+"microsoft","positive","126748497096622080"
+"microsoft","positive","126748156003221504"
+"microsoft","positive","126748155021762561"
+"microsoft","positive","126748153952206849"
+"microsoft","positive","126747965393084416"
+"microsoft","positive","126747962817781760"
+"microsoft","positive","126747960900984832"
+"microsoft","positive","126744770713362432"
+"microsoft","positive","126743288320491521"
+"microsoft","positive","126742063961214976"
+"microsoft","positive","126741671965769728"
+"microsoft","positive","126738939594813440"
+"microsoft","positive","126736431929507840"
+"microsoft","positive","126735438948995072"
+"microsoft","positive","126734527551913984"
+"microsoft","positive","126732577301217280"
+"microsoft","positive","126732240368570369"
+"microsoft","positive","126732148144209920"
+"microsoft","positive","126731873517965313"
+"microsoft","positive","126730153454870529"
+"microsoft","positive","126729713568849920"
+"microsoft","positive","126725535677157376"
+"microsoft","positive","126725332031127552"
+"microsoft","positive","126721324042305536"
+"microsoft","positive","126716103123673088"
+"microsoft","positive","126715314007314434"
+"microsoft","positive","126714241427312641"
+"microsoft","positive","126709513947594753"
+"microsoft","positive","126705831126384640"
+"microsoft","positive","126695555685560320"
+"microsoft","positive","126693834846515200"
+"microsoft","positive","126692533869871106"
+"microsoft","positive","126691809417113600"
+"microsoft","positive","126689578886246400"
+"microsoft","positive","126689007512993792"
+"microsoft","positive","126688284343672832"
+"microsoft","positive","126688181688078338"
+"microsoft","positive","126685198531297281"
+"microsoft","positive","126677986522054657"
+"microsoft","positive","126674853230149632"
+"microsoft","positive","126670032951443456"
+"microsoft","positive","126669652469350401"
+"microsoft","positive","126668529046007808"
+"microsoft","positive","126665091381854208"
+"microsoft","positive","126663720075141121"
+"microsoft","positive","126659125751971840"
+"microsoft","positive","126658961263951873"
+"microsoft","positive","126658937155108866"
+"microsoft","positive","126658528965439488"
+"microsoft","positive","126650473322262529"
+"microsoft","positive","126638821948403712"
+"microsoft","positive","126637126010929152"
+"microsoft","positive","126633708315873280"
+"microsoft","positive","126629320948060161"
+"microsoft","positive","126611718376919041"
+"microsoft","positive","126610651916410881"
+"microsoft","positive","126608712407322624"
+"microsoft","positive","126605340270788608"
+"microsoft","positive","126603756971360256"
+"microsoft","positive","126599751402668032"
+"microsoft","positive","126596256138137600"
+"microsoft","positive","126583935139454976"
+"microsoft","positive","126581165636333568"
+"microsoft","positive","126579574921371648"
+"microsoft","positive","126578340902617088"
+"microsoft","positive","126570919513686018"
+"microsoft","positive","126559269603647488"
+"microsoft","positive","126555956975910912"
+"microsoft","positive","126553559847288832"
+"microsoft","positive","126552645497405440"
+"microsoft","positive","126532025552347136"
+"microsoft","positive","126529895923843072"
+"microsoft","positive","126499160995282944"
+"microsoft","positive","126494986983325696"
+"microsoft","positive","126492789939765248"
+"microsoft","positive","126479912407273472"
+"microsoft","negative","126802977813037057"
+"microsoft","negative","126795554079510528"
+"microsoft","negative","126793207613894659"
+"microsoft","negative","126790861831938048"
+"microsoft","negative","126789739180326913"
+"microsoft","negative","126788223409197058"
+"microsoft","negative","126786271921184768"
+"microsoft","negative","126784810755690496"
+"microsoft","negative","126782820709441536"
+"microsoft","negative","126781543879421952"
+"microsoft","negative","126778114385772548"
+"microsoft","negative","126771575772020736"
+"microsoft","negative","126771575654596608"
+"microsoft","negative","126771575591665665"
+"microsoft","negative","126766935634485249"
+"microsoft","negative","126763744557662209"
+"microsoft","negative","126763108080427009"
+"microsoft","negative","126761988608098304"
+"microsoft","negative","126759080860725248"
+"microsoft","negative","126758070176059392"
+"microsoft","negative","126754989409452032"
+"microsoft","negative","126754753697943552"
+"microsoft","negative","126753683630333952"
+"microsoft","negative","126750481069510656"
+"microsoft","negative","126748219261726721"
+"microsoft","negative","126748091083788288"
+"microsoft","negative","126747859960856576"
+"microsoft","negative","126747247563112448"
+"microsoft","negative","126746946093330434"
+"microsoft","negative","126744670465306624"
+"microsoft","negative","126744386137624576"
+"microsoft","negative","126744132763910144"
+"microsoft","negative","126744130784198656"
+"microsoft","negative","126744129135845377"
+"microsoft","negative","126743725677346816"
+"microsoft","negative","126740373883191296"
+"microsoft","negative","126738867087884288"
+"microsoft","negative","126730748882460672"
+"microsoft","negative","126730580934135809"
+"microsoft","negative","126730265551843329"
+"microsoft","negative","126727758108823553"
+"microsoft","negative","126726935572262912"
+"microsoft","negative","126726458776358913"
+"microsoft","negative","126724991495569408"
+"microsoft","negative","126722961381134336"
+"microsoft","negative","126722095743893504"
+"microsoft","negative","126720736848117760"
+"microsoft","negative","126719328035942400"
+"microsoft","negative","126718833493938176"
+"microsoft","negative","126716806688804865"
+"microsoft","negative","126714471543619584"
+"microsoft","negative","126712110095925250"
+"microsoft","negative","126705293055889408"
+"microsoft","negative","126704033779023872"
+"microsoft","negative","126700315507572736"
+"microsoft","negative","126700014385897472"
+"microsoft","negative","126692062757269505"
+"microsoft","negative","126691815394000896"
+"microsoft","negative","126690175656001538"
+"microsoft","negative","126689051960033280"
+"microsoft","negative","126686283769712640"
+"microsoft","negative","126686075287642112"
+"microsoft","negative","126682137972834305"
+"microsoft","negative","126681641388216320"
+"microsoft","negative","126681389570596865"
+"microsoft","negative","126680630087008257"
+"microsoft","negative","126679134817624066"
+"microsoft","negative","126677032837971968"
+"microsoft","negative","126675041353076738"
+"microsoft","negative","126672651459633152"
+"microsoft","negative","126671856404144128"
+"microsoft","negative","126668133405696000"
+"microsoft","negative","126666768541421571"
+"microsoft","negative","126666110237032448"
+"microsoft","negative","126662601139695616"
+"microsoft","negative","126647829656641536"
+"microsoft","negative","126647697800310784"
+"microsoft","negative","126647264943943682"
+"microsoft","negative","126646630702260224"
+"microsoft","negative","126645939208327168"
+"microsoft","negative","126644987474608128"
+"microsoft","negative","126644615280463872"
+"microsoft","negative","126643061567668224"
+"microsoft","negative","126642517310570497"
+"microsoft","negative","126642328466243584"
+"microsoft","negative","126642036572041216"
+"microsoft","negative","126640632478445568"
+"microsoft","negative","126637450008346624"
+"microsoft","negative","126636886977556480"
+"microsoft","negative","126636535813636096"
+"microsoft","negative","126635317108289536"
+"microsoft","negative","126628406258450432"
+"microsoft","negative","126622538557177856"
+"microsoft","negative","126618374972248064"
+"microsoft","negative","126615502259879938"
+"microsoft","negative","126614764339212288"
+"microsoft","negative","126606967736238080"
+"microsoft","negative","126606323625361408"
+"microsoft","negative","126603861933817856"
+"microsoft","negative","126602276872794112"
+"microsoft","negative","126602276805685248"
+"microsoft","negative","126599337361932288"
+"microsoft","negative","126595347198902272"
+"microsoft","negative","126594799062102016"
+"microsoft","negative","126586083470360576"
+"microsoft","negative","126583473929588736"
+"microsoft","negative","126579470424473600"
+"microsoft","negative","126579121303207936"
+"microsoft","negative","126577595553824770"
+"microsoft","negative","126574432159408129"
+"microsoft","negative","126573688941318144"
+"microsoft","negative","126573680665964544"
+"microsoft","negative","126572762411171840"
+"microsoft","negative","126572362316513280"
+"microsoft","negative","126570851389800448"
+"microsoft","negative","126570732430966785"
+"microsoft","negative","126570180070481920"
+"microsoft","negative","126567507350913024"
+"microsoft","negative","126566106252062720"
+"microsoft","negative","126565636703924225"
+"microsoft","negative","126564844211154944"
+"microsoft","negative","126560723794010112"
+"microsoft","negative","126558437017530368"
+"microsoft","negative","126546665342640128"
+"microsoft","negative","126546043193147394"
+"microsoft","negative","126543225501650944"
+"microsoft","negative","126525932625334272"
+"microsoft","negative","126519630868119552"
+"microsoft","negative","126518620095393792"
+"microsoft","negative","126517164416049152"
+"microsoft","negative","126515911321591808"
+"microsoft","negative","126513363529383937"
+"microsoft","negative","126510880761458688"
+"microsoft","negative","126508179281547264"
+"microsoft","negative","126504435227701248"
+"microsoft","negative","126488915996721153"
+"microsoft","negative","126485347935862784"
+"microsoft","negative","126484955047006209"
+"microsoft","neutral","126809228194217984"
+"microsoft","neutral","126808500356644864"
+"microsoft","neutral","126808393892634625"
+"microsoft","neutral","126805369619087360"
+"microsoft","neutral","126805239209803777"
+"microsoft","neutral","126804966491955201"
+"microsoft","neutral","126804937438003200"
+"microsoft","neutral","126804497493266433"
+"microsoft","neutral","126803763603312640"
+"microsoft","neutral","126803594216341506"
+"microsoft","neutral","126803513190789121"
+"microsoft","neutral","126803044426985472"
+"microsoft","neutral","126802586178293760"
+"microsoft","neutral","126802366996561920"
+"microsoft","neutral","126800882477174784"
+"microsoft","neutral","126800407585501184"
+"microsoft","neutral","126800313029099520"
+"microsoft","neutral","126799514270040064"
+"microsoft","neutral","126798908335734785"
+"microsoft","neutral","126798906129526784"
+"microsoft","neutral","126797598425223169"
+"microsoft","neutral","126797364823457792"
+"microsoft","neutral","126797364433387520"
+"microsoft","neutral","126797363418370048"
+"microsoft","neutral","126797339758305280"
+"microsoft","neutral","126797209244151808"
+"microsoft","neutral","126797207159582720"
+"microsoft","neutral","126797206232645635"
+"microsoft","neutral","126796929366638592"
+"microsoft","neutral","126795614649466880"
+"microsoft","neutral","126795256225210368"
+"microsoft","neutral","126795026771607553"
+"microsoft","neutral","126795013752504320"
+"microsoft","neutral","126795009986002946"
+"microsoft","neutral","126794825998663680"
+"microsoft","neutral","126793516398546945"
+"microsoft","neutral","126792687138508801"
+"microsoft","neutral","126792112313352192"
+"microsoft","neutral","126791726559010816"
+"microsoft","neutral","126791479925555200"
+"microsoft","neutral","126790637243740160"
+"microsoft","neutral","126789918826565632"
+"microsoft","neutral","126789806905769984"
+"microsoft","neutral","126789806356303872"
+"microsoft","neutral","126789710705213440"
+"microsoft","neutral","126789040015020033"
+"microsoft","neutral","126788856476471296"
+"microsoft","neutral","126788488183037953"
+"microsoft","neutral","126786825913245698"
+"microsoft","neutral","126786517531242496"
+"microsoft","neutral","126786021340884992"
+"microsoft","neutral","126785035599749121"
+"microsoft","neutral","126784430185521154"
+"microsoft","neutral","126782184748097536"
+"microsoft","neutral","126781699811061760"
+"microsoft","neutral","126781384600727552"
+"microsoft","neutral","126781241423962112"
+"microsoft","neutral","126780592036646913"
+"microsoft","neutral","126779798986047488"
+"microsoft","neutral","126779783760723968"
+"microsoft","neutral","126779778090008576"
+"microsoft","neutral","126779040282587138"
+"microsoft","neutral","126778210783473665"
+"microsoft","neutral","126776771159916546"
+"microsoft","neutral","126775752610942976"
+"microsoft","neutral","126774185577021445"
+"microsoft","neutral","126773807875751936"
+"microsoft","neutral","126773769262997504"
+"microsoft","neutral","126773680196947968"
+"microsoft","neutral","126773504644349954"
+"microsoft","neutral","126773458519605248"
+"microsoft","neutral","126772660024774656"
+"microsoft","neutral","126772118879879169"
+"microsoft","neutral","126771594470232064"
+"microsoft","neutral","126769762452770819"
+"microsoft","neutral","126768953027608576"
+"microsoft","neutral","126768749243146240"
+"microsoft","neutral","126768739692720128"
+"microsoft","neutral","126768259734315008"
+"microsoft","neutral","126768125386571776"
+"microsoft","neutral","126768055769513984"
+"microsoft","neutral","126768000949948416"
+"microsoft","neutral","126767824386531329"
+"microsoft","neutral","126767467077976065"
+"microsoft","neutral","126767049312714752"
+"microsoft","neutral","126766670109872128"
+"microsoft","neutral","126766092629712896"
+"microsoft","neutral","126765064299614209"
+"microsoft","neutral","126764662510452736"
+"microsoft","neutral","126764605623111680"
+"microsoft","neutral","126764604218028033"
+"microsoft","neutral","126762652402860033"
+"microsoft","neutral","126762576557248512"
+"microsoft","neutral","126761994098442241"
+"microsoft","neutral","126761880801910784"
+"microsoft","neutral","126761759041265664"
+"microsoft","neutral","126761312016547840"
+"microsoft","neutral","126761103677071362"
+"microsoft","neutral","126761034341040128"
+"microsoft","neutral","126758826337771520"
+"microsoft","neutral","126758715981434881"
+"microsoft","neutral","126756834257285120"
+"microsoft","neutral","126755580604981249"
+"microsoft","neutral","126755433108082688"
+"microsoft","neutral","126755073664618496"
+"microsoft","neutral","126754216160145409"
+"microsoft","neutral","126753468160540672"
+"microsoft","neutral","126752950654746624"
+"microsoft","neutral","126752714385391616"
+"microsoft","neutral","126752712300826624"
+"microsoft","neutral","126752262281371649"
+"microsoft","neutral","126751213692792832"
+"microsoft","neutral","126751180557778945"
+"microsoft","neutral","126750292212584449"
+"microsoft","neutral","126749618229878784"
+"microsoft","neutral","126749567428468736"
+"microsoft","neutral","126749486545506304"
+"microsoft","neutral","126749360745758722"
+"microsoft","neutral","126749318198730752"
+"microsoft","neutral","126748764869378048"
+"microsoft","neutral","126748146582818816"
+"microsoft","neutral","126747712082292736"
+"microsoft","neutral","126747117120274432"
+"microsoft","neutral","126746786806247424"
+"microsoft","neutral","126746562062848001"
+"microsoft","neutral","126746140791144448"
+"microsoft","neutral","126745920967680000"
+"microsoft","neutral","126745707414691840"
+"microsoft","neutral","126745457232846849"
+"microsoft","neutral","126745293378166784"
+"microsoft","neutral","126745275686600705"
+"microsoft","neutral","126745248121618434"
+"microsoft","neutral","126744229912379392"
+"microsoft","neutral","126743926903283713"
+"microsoft","neutral","126743901968150528"
+"microsoft","neutral","126743843348561920"
+"microsoft","neutral","126743100809945090"
+"microsoft","neutral","126743096896655362"
+"microsoft","neutral","126742966890004481"
+"microsoft","neutral","126742062635810817"
+"microsoft","neutral","126741965915168771"
+"microsoft","neutral","126741817365504000"
+"microsoft","neutral","126741679024783360"
+"microsoft","neutral","126741473726177280"
+"microsoft","neutral","126740991087611904"
+"microsoft","neutral","126740408926609409"
+"microsoft","neutral","126740066138722305"
+"microsoft","neutral","126739377232687105"
+"microsoft","neutral","126737802657402880"
+"microsoft","neutral","126737400658538496"
+"microsoft","neutral","126737368001683457"
+"microsoft","neutral","126737116049833984"
+"microsoft","neutral","126736393987817472"
+"microsoft","neutral","126735639508037632"
+"microsoft","neutral","126735624299487232"
+"microsoft","neutral","126735411811848192"
+"microsoft","neutral","126735013667545088"
+"microsoft","neutral","126734902514286593"
+"microsoft","neutral","126734632120102912"
+"microsoft","neutral","126734120058490881"
+"microsoft","neutral","126733687944515584"
+"microsoft","neutral","126733589328048128"
+"microsoft","neutral","126733404870942720"
+"microsoft","neutral","126732739083898881"
+"microsoft","neutral","126732384602296320"
+"microsoft","neutral","126732343254851584"
+"microsoft","neutral","126732316000264192"
+"microsoft","neutral","126731883659804672"
+"microsoft","neutral","126731759646810113"
+"microsoft","neutral","126729624217600001"
+"microsoft","neutral","126729179994656769"
+"microsoft","neutral","126728328358014980"
+"microsoft","neutral","126728277896347649"
+"microsoft","neutral","126727087770963968"
+"microsoft","neutral","126726863811903488"
+"microsoft","neutral","126726826923012096"
+"microsoft","neutral","126726199123771392"
+"microsoft","neutral","126725918629695489"
+"microsoft","neutral","126725691059351552"
+"microsoft","neutral","126725006729281536"
+"microsoft","neutral","126724433791549440"
+"microsoft","neutral","126724174378045440"
+"microsoft","neutral","126723741651714048"
+"microsoft","neutral","126723269326938112"
+"microsoft","neutral","126723018989907968"
+"microsoft","neutral","126722729444515840"
+"microsoft","neutral","126722549584379905"
+"microsoft","neutral","126722463265599488"
+"microsoft","neutral","126722284776980480"
+"microsoft","neutral","126722253579763712"
+"microsoft","neutral","126722158394228736"
+"microsoft","neutral","126722000549974016"
+"microsoft","neutral","126721061797629953"
+"microsoft","neutral","126720971515248641"
+"microsoft","neutral","126719738293391360"
+"microsoft","neutral","126719724376702976"
+"microsoft","neutral","126718237252648960"
+"microsoft","neutral","126717789829472256"
+"microsoft","neutral","126717208557649920"
+"microsoft","neutral","126716851706277889"
+"microsoft","neutral","126716781510410240"
+"microsoft","neutral","126716221575004160"
+"microsoft","neutral","126715906096238592"
+"microsoft","neutral","126715414716747777"
+"microsoft","neutral","126714453105446912"
+"microsoft","neutral","126713704099221504"
+"microsoft","neutral","126713393599086592"
+"microsoft","neutral","126713363689517056"
+"microsoft","neutral","126713356739547136"
+"microsoft","neutral","126713203240607744"
+"microsoft","neutral","126712975309537280"
+"microsoft","neutral","126711721376235520"
+"microsoft","neutral","126710782615494657"
+"microsoft","neutral","126710485604237313"
+"microsoft","neutral","126709729924874241"
+"microsoft","neutral","126709714565349376"
+"microsoft","neutral","126708211108683776"
+"microsoft","neutral","126708014081257472"
+"microsoft","neutral","126707947744145409"
+"microsoft","neutral","126707946955616256"
+"microsoft","neutral","126707601504346112"
+"microsoft","neutral","126707418536218624"
+"microsoft","neutral","126707389142544384"
+"microsoft","neutral","126706701968736256"
+"microsoft","neutral","126706701306052609"
+"microsoft","neutral","126706255413772288"
+"microsoft","neutral","126705857206562816"
+"microsoft","neutral","126705491685556225"
+"microsoft","neutral","126705440640860161"
+"microsoft","neutral","126705187942449152"
+"microsoft","neutral","126705130681794560"
+"microsoft","neutral","126704776506380288"
+"microsoft","neutral","126704323542523904"
+"microsoft","neutral","126704037738459136"
+"microsoft","neutral","126703842732683265"
+"microsoft","neutral","126703823837335552"
+"microsoft","neutral","126703372207271936"
+"microsoft","neutral","126702051001516032"
+"microsoft","neutral","126701413907701760"
+"microsoft","neutral","126701318978019328"
+"microsoft","neutral","126701312447496192"
+"microsoft","neutral","126701230331412480"
+"microsoft","neutral","126700724389281792"
+"microsoft","neutral","126700451163942912"
+"microsoft","neutral","126699934694129664"
+"microsoft","neutral","126699730028855296"
+"microsoft","neutral","126699437711040512"
+"microsoft","neutral","126699406765465601"
+"microsoft","neutral","126699405356171264"
+"microsoft","neutral","126699162879275008"
+"microsoft","neutral","126698863447916544"
+"microsoft","neutral","126698757503991808"
+"microsoft","neutral","126698756845481984"
+"microsoft","neutral","126698755884978177"
+"microsoft","neutral","126698717930717184"
+"microsoft","neutral","126698707252027392"
+"microsoft","neutral","126698704290852865"
+"microsoft","neutral","126698703410040834"
+"microsoft","neutral","126698504910413827"
+"microsoft","neutral","126698182141947904"
+"microsoft","neutral","126697870689710080"
+"microsoft","neutral","126697419953012737"
+"microsoft","neutral","126697038187474944"
+"microsoft","neutral","126696856959983616"
+"microsoft","neutral","126696843299135488"
+"microsoft","neutral","126696759077511168"
+"microsoft","neutral","126696757294927874"
+"microsoft","neutral","126696688046968832"
+"microsoft","neutral","126696671894704129"
+"microsoft","neutral","126696627955171328"
+"microsoft","neutral","126696501849227264"
+"microsoft","neutral","126696344881594368"
+"microsoft","neutral","126695550975356928"
+"microsoft","neutral","126695264487604224"
+"microsoft","neutral","126694893841158145"
+"microsoft","neutral","126694267560271872"
+"microsoft","neutral","126693945123147776"
+"microsoft","neutral","126693187346632704"
+"microsoft","neutral","126693021365452800"
+"microsoft","neutral","126692888741548032"
+"microsoft","neutral","126692854981595136"
+"microsoft","neutral","126692811859963904"
+"microsoft","neutral","126692357277102080"
+"microsoft","neutral","126691842736668673"
+"microsoft","neutral","126691830774505472"
+"microsoft","neutral","126691604839927808"
+"microsoft","neutral","126691578361298945"
+"microsoft","neutral","126691206976643072"
+"microsoft","neutral","126691146209558528"
+"microsoft","neutral","126691006207893505"
+"microsoft","neutral","126690823306870784"
+"microsoft","neutral","126690446872297473"
+"microsoft","neutral","126690445119070210"
+"microsoft","neutral","126690443588153345"
+"microsoft","neutral","126690396720988160"
+"microsoft","neutral","126690392568639488"
+"microsoft","neutral","126690357042884608"
+"microsoft","neutral","126690074946568192"
+"microsoft","neutral","126689592832294913"
+"microsoft","neutral","126689002114924544"
+"microsoft","neutral","126688185228079104"
+"microsoft","neutral","126688055691190274"
+"microsoft","neutral","126687847762771970"
+"microsoft","neutral","126686806585520128"
+"microsoft","neutral","126686450019344384"
+"microsoft","neutral","126686367479631872"
+"microsoft","neutral","126686111656448000"
+"microsoft","neutral","126686012884787200"
+"microsoft","neutral","126685318899449856"
+"microsoft","neutral","126685194278289408"
+"microsoft","neutral","126685077823422466"
+"microsoft","neutral","126684141306650625"
+"microsoft","neutral","126683940764401665"
+"microsoft","neutral","126683909722357760"
+"microsoft","neutral","126683288239751169"
+"microsoft","neutral","126683225123852288"
+"microsoft","neutral","126681783222808578"
+"microsoft","neutral","126680710403719168"
+"microsoft","neutral","126680178196877312"
+"microsoft","neutral","126680085959942144"
+"microsoft","neutral","126679050742800385"
+"microsoft","neutral","126678934237626368"
+"microsoft","neutral","126678725491294209"
+"microsoft","neutral","126678690334638080"
+"microsoft","neutral","126678683393081344"
+"microsoft","neutral","126678622449831937"
+"microsoft","neutral","126678492464152576"
+"microsoft","neutral","126678454639927297"
+"microsoft","neutral","126677952644648960"
+"microsoft","neutral","126677913058820097"
+"microsoft","neutral","126677721647554561"
+"microsoft","neutral","126677403480231937"
+"microsoft","neutral","126677050265305088"
+"microsoft","neutral","126676455936622593"
+"microsoft","neutral","126676302873899008"
+"microsoft","neutral","126675994378637312"
+"microsoft","neutral","126675231896117248"
+"microsoft","neutral","126674948671545344"
+"microsoft","neutral","126674919969923072"
+"microsoft","neutral","126674748674547712"
+"microsoft","neutral","126674460131606529"
+"microsoft","neutral","126673949907107840"
+"microsoft","neutral","126673753680773122"
+"microsoft","neutral","126673364231270400"
+"microsoft","neutral","126672797731790848"
+"microsoft","neutral","126671949320556544"
+"microsoft","neutral","126671882211692545"
+"microsoft","neutral","126671664011419648"
+"microsoft","neutral","126671654540677122"
+"microsoft","neutral","126671006302617600"
+"microsoft","neutral","126669870090829824"
+"microsoft","neutral","126669451369263106"
+"microsoft","neutral","126668947503325184"
+"microsoft","neutral","126668764199665664"
+"microsoft","neutral","126668119354781696"
+"microsoft","neutral","126667815116750848"
+"microsoft","neutral","126667274080894976"
+"microsoft","neutral","126666679785766914"
+"microsoft","neutral","126666425715798016"
+"microsoft","neutral","126665951172243456"
+"microsoft","neutral","126665581519835136"
+"microsoft","neutral","126664658651332608"
+"microsoft","neutral","126664524156764160"
+"microsoft","neutral","126664410029764608"
+"microsoft","neutral","126664404891746304"
+"microsoft","neutral","126663746767699969"
+"microsoft","neutral","126662968434900992"
+"microsoft","neutral","126662905084129280"
+"microsoft","neutral","126661971566264320"
+"microsoft","neutral","126660704496402433"
+"microsoft","neutral","126659682076082176"
+"microsoft","neutral","126659577033928704"
+"microsoft","neutral","126659465985536000"
+"microsoft","neutral","126659266315698177"
+"microsoft","neutral","126658871363239936"
+"microsoft","neutral","126658110084489216"
+"microsoft","neutral","126657343965507584"
+"microsoft","neutral","126656580715429888"
+"microsoft","neutral","126656236157538304"
+"microsoft","neutral","126656052971323393"
+"microsoft","neutral","126655887740903424"
+"microsoft","neutral","126655887178870784"
+"microsoft","neutral","126655373015924736"
+"microsoft","neutral","126654466735878144"
+"microsoft","neutral","126653861371973632"
+"microsoft","neutral","126653619327082496"
+"microsoft","neutral","126653015754149888"
+"microsoft","neutral","126652714804461569"
+"microsoft","neutral","126652661251571712"
+"microsoft","neutral","126652256404779008"
+"microsoft","neutral","126651931287498752"
+"microsoft","neutral","126651699380232193"
+"microsoft","neutral","126651497718095873"
+"microsoft","neutral","126651446589526016"
+"microsoft","neutral","126651412443693057"
+"microsoft","neutral","126651402549346304"
+"microsoft","neutral","126651401010028544"
+"microsoft","neutral","126651399730769922"
+"microsoft","neutral","126651363718467585"
+"microsoft","neutral","126651360602112000"
+"microsoft","neutral","126650859290501121"
+"microsoft","neutral","126650839279476736"
+"microsoft","neutral","126650651370467328"
+"microsoft","neutral","126649108202143744"
+"microsoft","neutral","126647567877541889"
+"microsoft","neutral","126647558469722112"
+"microsoft","neutral","126646015863427072"
+"microsoft","neutral","126645991947513857"
+"microsoft","neutral","126645459568705536"
+"microsoft","neutral","126644332177530880"
+"microsoft","neutral","126644191299252224"
+"microsoft","neutral","126643947190759425"
+"microsoft","neutral","126642779064504320"
+"microsoft","neutral","126642612412227585"
+"microsoft","neutral","126642611598540801"
+"microsoft","neutral","126642532863049728"
+"microsoft","neutral","126642137424076800"
+"microsoft","neutral","126641727254695937"
+"microsoft","neutral","126641223229386752"
+"microsoft","neutral","126640775730700288"
+"microsoft","neutral","126640203828969472"
+"microsoft","neutral","126638865971806209"
+"microsoft","neutral","126638751307931648"
+"microsoft","neutral","126638261421613056"
+"microsoft","neutral","126637865550618624"
+"microsoft","neutral","126637677335429121"
+"microsoft","neutral","126637516148318208"
+"microsoft","neutral","126637437953900546"
+"microsoft","neutral","126637411114553344"
+"microsoft","neutral","126636379479343106"
+"microsoft","neutral","126636379324170240"
+"microsoft","neutral","126636194653147136"
+"microsoft","neutral","126635954520854528"
+"microsoft","neutral","126635633245564928"
+"microsoft","neutral","126635573921316864"
+"microsoft","neutral","126635080100757504"
+"microsoft","neutral","126635053139763200"
+"microsoft","neutral","126634784326828032"
+"microsoft","neutral","126634780954595328"
+"microsoft","neutral","126634396773130240"
+"microsoft","neutral","126632917643427841"
+"microsoft","neutral","126632860386988033"
+"microsoft","neutral","126632280985845760"
+"microsoft","neutral","126631882149474305"
+"microsoft","neutral","126631805288849408"
+"microsoft","neutral","126631188394811392"
+"microsoft","neutral","126631077468049408"
+"microsoft","neutral","126630800413310976"
+"microsoft","neutral","126630728690704384"
+"microsoft","neutral","126628957566156800"
+"microsoft","neutral","126628527155052544"
+"microsoft","neutral","126626502937812992"
+"microsoft","neutral","126626166261022720"
+"microsoft","neutral","126625578567729152"
+"microsoft","neutral","126625418118832128"
+"microsoft","neutral","126625193786490881"
+"microsoft","neutral","126622411587190784"
+"microsoft","neutral","126622356817981440"
+"microsoft","neutral","126622297917374464"
+"microsoft","neutral","126621573271650304"
+"microsoft","neutral","126621144177577984"
+"microsoft","neutral","126621142743138305"
+"microsoft","neutral","126619580096462848"
+"microsoft","neutral","126619293977812992"
+"microsoft","neutral","126619258099744768"
+"microsoft","neutral","126619256015167488"
+"microsoft","neutral","126617626435780608"
+"microsoft","neutral","126617142761230337"
+"microsoft","neutral","126616809079193600"
+"microsoft","neutral","126616636621996032"
+"microsoft","neutral","126615663119843328"
+"microsoft","neutral","126615649077301248"
+"microsoft","neutral","126615565673570305"
+"microsoft","neutral","126615350916816896"
+"microsoft","neutral","126614792092909568"
+"microsoft","neutral","126614365280542720"
+"microsoft","neutral","126614116927422464"
+"microsoft","neutral","126614037608935424"
+"microsoft","neutral","126613919623159808"
+"microsoft","neutral","126613753499353088"
+"microsoft","neutral","126613640278315008"
+"microsoft","neutral","126613630551719936"
+"microsoft","neutral","126613447487127553"
+"microsoft","neutral","126613275331919872"
+"microsoft","neutral","126611886979555328"
+"microsoft","neutral","126611785666134016"
+"microsoft","neutral","126611604925194240"
+"microsoft","neutral","126611557370183681"
+"microsoft","neutral","126611372552355841"
+"microsoft","neutral","126610800382181376"
+"microsoft","neutral","126610131671715840"
+"microsoft","neutral","126610091855192064"
+"microsoft","neutral","126610035655704577"
+"microsoft","neutral","126609904298491904"
+"microsoft","neutral","126609815278592000"
+"microsoft","neutral","126608763456200704"
+"microsoft","neutral","126607853640364032"
+"microsoft","neutral","126607793280126976"
+"microsoft","neutral","126607106953580546"
+"microsoft","neutral","126606671874228225"
+"microsoft","neutral","126606546556829696"
+"microsoft","neutral","126606023174791168"
+"microsoft","neutral","126604925881954305"
+"microsoft","neutral","126604903408869378"
+"microsoft","neutral","126604828150480896"
+"microsoft","neutral","126604631894790144"
+"microsoft","neutral","126604425832837120"
+"microsoft","neutral","126604425635692545"
+"microsoft","neutral","126604152536178688"
+"microsoft","neutral","126603598724476928"
+"microsoft","neutral","126602791807496192"
+"microsoft","neutral","126602212511203328"
+"microsoft","neutral","126601735417499648"
+"microsoft","neutral","126601553409875968"
+"microsoft","neutral","126600989150158849"
+"microsoft","neutral","126600074825445376"
+"microsoft","neutral","126599980847869952"
+"microsoft","neutral","126599671400497153"
+"microsoft","neutral","126599087389806592"
+"microsoft","neutral","126598871760642048"
+"microsoft","neutral","126598867847348224"
+"microsoft","neutral","126598515248992257"
+"microsoft","neutral","126597883477762048"
+"microsoft","neutral","126597538794057728"
+"microsoft","neutral","126596825875611648"
+"microsoft","neutral","126596088999313408"
+"microsoft","neutral","126596045575684096"
+"microsoft","neutral","126595447455367168"
+"microsoft","neutral","126595014846459904"
+"microsoft","neutral","126593877321842690"
+"microsoft","neutral","126593636627513344"
+"microsoft","neutral","126593450253623297"
+"microsoft","neutral","126592300070608896"
+"microsoft","neutral","126592158802264064"
+"microsoft","neutral","126592000395984896"
+"microsoft","neutral","126591976408748032"
+"microsoft","neutral","126591072884359168"
+"microsoft","neutral","126591029993406464"
+"microsoft","neutral","126590959290032128"
+"microsoft","neutral","126590231137886209"
+"microsoft","neutral","126588750628257792"
+"microsoft","neutral","126588693577338881"
+"microsoft","neutral","126588643782574080"
+"microsoft","neutral","126588145549574144"
+"microsoft","neutral","126586994536091648"
+"microsoft","neutral","126586306464727041"
+"microsoft","neutral","126585952394166272"
+"microsoft","neutral","126585097297199104"
+"microsoft","neutral","126584640453611521"
+"microsoft","neutral","126583805229285376"
+"microsoft","neutral","126582210420674560"
+"microsoft","neutral","126582173787619328"
+"microsoft","neutral","126581768529788929"
+"microsoft","neutral","126581659612094464"
+"microsoft","neutral","126581308158779392"
+"microsoft","neutral","126581270435201024"
+"microsoft","neutral","126580539636449281"
+"microsoft","neutral","126580273965047810"
+"microsoft","neutral","126580014819983360"
+"microsoft","neutral","126579602524082176"
+"microsoft","neutral","126579169646751744"
+"microsoft","neutral","126579166589108224"
+"microsoft","neutral","126577845421096960"
+"microsoft","neutral","126577204078456832"
+"microsoft","neutral","126577114005782528"
+"microsoft","neutral","126577085081849856"
+"microsoft","neutral","126576350067818497"
+"microsoft","neutral","126576121784442880"
+"microsoft","neutral","126573645421228032"
+"microsoft","neutral","126573265178202112"
+"microsoft","neutral","126573186774089728"
+"microsoft","neutral","126572513483436032"
+"microsoft","neutral","126572384126894080"
+"microsoft","neutral","126572306230288385"
+"microsoft","neutral","126572275876106240"
+"microsoft","neutral","126572275016286208"
+"microsoft","neutral","126570339105914881"
+"microsoft","neutral","126569564963217408"
+"microsoft","neutral","126569115082166273"
+"microsoft","neutral","126568785921572864"
+"microsoft","neutral","126568308584628224"
+"microsoft","neutral","126568122261061632"
+"microsoft","neutral","126567868065263616"
+"microsoft","neutral","126567546165014528"
+"microsoft","neutral","126566918911041536"
+"microsoft","neutral","126566540010192896"
+"microsoft","neutral","126566417276469248"
+"microsoft","neutral","126566305716387840"
+"microsoft","neutral","126565140702298112"
+"microsoft","neutral","126564015873867777"
+"microsoft","neutral","126564004612161536"
+"microsoft","neutral","126563375189725184"
+"microsoft","neutral","126563374384422912"
+"microsoft","neutral","126563373230997504"
+"microsoft","neutral","126563286392123392"
+"microsoft","neutral","126562736242040833"
+"microsoft","neutral","126562457375350785"
+"microsoft","neutral","126561633978945536"
+"microsoft","neutral","126561530258001920"
+"microsoft","neutral","126561428319649793"
+"microsoft","neutral","126559900036894720"
+"microsoft","neutral","126559880860545024"
+"microsoft","neutral","126557628531875840"
+"microsoft","neutral","126557171742810112"
+"microsoft","neutral","126555866332798976"
+"microsoft","neutral","126554531713650688"
+"microsoft","neutral","126553189511208960"
+"microsoft","neutral","126552864876269568"
+"microsoft","neutral","126552665063829504"
+"microsoft","neutral","126549688857010177"
+"microsoft","neutral","126548837958557696"
+"microsoft","neutral","126545396913815552"
+"microsoft","neutral","126545157708451842"
+"microsoft","neutral","126542911654477824"
+"microsoft","neutral","126542412226105345"
+"microsoft","neutral","126541046615244801"
+"microsoft","neutral","126538706944401409"
+"microsoft","neutral","126537896999124992"
+"microsoft","neutral","126536228538564608"
+"microsoft","neutral","126536172867559424"
+"microsoft","neutral","126535951764819968"
+"microsoft","neutral","126534265205829632"
+"microsoft","neutral","126534195777519616"
+"microsoft","neutral","126532191831343105"
+"microsoft","neutral","126528995394199552"
+"microsoft","neutral","126528296354725890"
+"microsoft","neutral","126528197750829056"
+"microsoft","neutral","126526992609849344"
+"microsoft","neutral","126525976925585408"
+"microsoft","neutral","126524240324337664"
+"microsoft","neutral","126523034906529793"
+"microsoft","neutral","126522766261366784"
+"microsoft","neutral","126522655892447233"
+"microsoft","neutral","126519858035822594"
+"microsoft","neutral","126519595682119681"
+"microsoft","neutral","126514963924787201"
+"microsoft","neutral","126514187647201280"
+"microsoft","neutral","126508567053340672"
+"microsoft","neutral","126508512930050048"
+"microsoft","neutral","126507878382174208"
+"microsoft","neutral","126507753484193792"
+"microsoft","neutral","126507677919617024"
+"microsoft","neutral","126507292777652224"
+"microsoft","neutral","126506577946615808"
+"microsoft","neutral","126500912578564097"
+"microsoft","neutral","126500486047207425"
+"microsoft","neutral","126500411522809856"
+"microsoft","neutral","126499066229170176"
+"microsoft","neutral","126498215204892672"
+"microsoft","neutral","126496197220380672"
+"microsoft","neutral","126494059987603456"
+"microsoft","neutral","126493220279558144"
+"microsoft","neutral","126488619002236928"
+"microsoft","neutral","126487011849809920"
+"microsoft","neutral","126485244651126784"
+"microsoft","neutral","126484490049687552"
+"microsoft","neutral","126483887923793920"
+"microsoft","neutral","126483490911952896"
+"microsoft","neutral","126483128163373057"
+"microsoft","neutral","126482131412189184"
+"microsoft","neutral","126481856639143936"
+"microsoft","neutral","126481810803793921"
+"microsoft","neutral","126481496147111936"
+"microsoft","neutral","126480696075227137"
+"microsoft","neutral","126479554217910272"
+"microsoft","neutral","126479285702762496"
+"microsoft","neutral","126476071238508545"
+"microsoft","neutral","126474577411969024"
+"microsoft","neutral","126470128589217795"
+"microsoft","neutral","126467278144413696"
+"microsoft","neutral","126466763746574337"
+"microsoft","irrelevant","126807138587783168"
+"microsoft","irrelevant","126806596662726660"
+"microsoft","irrelevant","126806460146524160"
+"microsoft","irrelevant","126801012852916224"
+"microsoft","irrelevant","126800980783271936"
+"microsoft","irrelevant","126800969521577984"
+"microsoft","irrelevant","126800228560019456"
+"microsoft","irrelevant","126798811262763009"
+"microsoft","irrelevant","126798048289488896"
+"microsoft","irrelevant","126796467213058048"
+"microsoft","irrelevant","126794558456610816"
+"microsoft","irrelevant","126794506266882048"
+"microsoft","irrelevant","126794057979670529"
+"microsoft","irrelevant","126792436176531456"
+"microsoft","irrelevant","126792221730148353"
+"microsoft","irrelevant","126792156852666369"
+"microsoft","irrelevant","126789413706543104"
+"microsoft","irrelevant","126788967692648448"
+"microsoft","irrelevant","126788442356064256"
+"microsoft","irrelevant","126787343704260608"
+"microsoft","irrelevant","126787220022640640"
+"microsoft","irrelevant","126787152708255744"
+"microsoft","irrelevant","126786658866700289"
+"microsoft","irrelevant","126786402837995521"
+"microsoft","irrelevant","126785814876274688"
+"microsoft","irrelevant","126785710920441858"
+"microsoft","irrelevant","126785377863340033"
+"microsoft","irrelevant","126785355310579712"
+"microsoft","irrelevant","126784813066756096"
+"microsoft","irrelevant","126783417152053248"
+"microsoft","irrelevant","126782732373196800"
+"microsoft","irrelevant","126781181466378240"
+"microsoft","irrelevant","126780041857216512"
+"microsoft","irrelevant","126779541720010752"
+"microsoft","irrelevant","126779286458871809"
+"microsoft","irrelevant","126777498716479488"
+"microsoft","irrelevant","126776818941440000"
+"microsoft","irrelevant","126775712525979648"
+"microsoft","irrelevant","126775015021625346"
+"microsoft","irrelevant","126775003927678976"
+"microsoft","irrelevant","126775002082189312"
+"microsoft","irrelevant","126774982935187456"
+"microsoft","irrelevant","126774793746907137"
+"microsoft","irrelevant","126774646715580416"
+"microsoft","irrelevant","126774646690414593"
+"microsoft","irrelevant","126774646568783872"
+"microsoft","irrelevant","126774645474070528"
+"microsoft","irrelevant","126774641044897793"
+"microsoft","irrelevant","126773569345699840"
+"microsoft","irrelevant","126773055816085504"
+"microsoft","irrelevant","126771371954020353"
+"microsoft","irrelevant","126771131884638208"
+"microsoft","irrelevant","126770870709534720"
+"microsoft","irrelevant","126770869254094849"
+"microsoft","irrelevant","126770866435530752"
+"microsoft","irrelevant","126769830568280065"
+"microsoft","irrelevant","126769633301774336"
+"microsoft","irrelevant","126769627484274688"
+"microsoft","irrelevant","126769625659748353"
+"microsoft","irrelevant","126769625626198016"
+"microsoft","irrelevant","126769625420673024"
+"microsoft","irrelevant","126769623591944192"
+"microsoft","irrelevant","126769621394137088"
+"microsoft","irrelevant","126769618458120192"
+"microsoft","irrelevant","126769617342435329"
+"microsoft","irrelevant","126769617057234944"
+"microsoft","irrelevant","126769617019486209"
+"microsoft","irrelevant","126769616826531840"
+"microsoft","irrelevant","126769452967669761"
+"microsoft","irrelevant","126768905216720896"
+"microsoft","irrelevant","126768623472750592"
+"microsoft","irrelevant","126768380945502208"
+"microsoft","irrelevant","126767886814543872"
+"microsoft","irrelevant","126767288652271617"
+"microsoft","irrelevant","126767180112076800"
+"microsoft","irrelevant","126765812534099969"
+"microsoft","irrelevant","126765752928829441"
+"microsoft","irrelevant","126765300904505344"
+"microsoft","irrelevant","126764864197758976"
+"microsoft","irrelevant","126764589835759616"
+"microsoft","irrelevant","126764578695680000"
+"microsoft","irrelevant","126764341784625152"
+"microsoft","irrelevant","126763862409232384"
+"microsoft","irrelevant","126763321541148672"
+"microsoft","irrelevant","126762153012236288"
+"microsoft","irrelevant","126762113980055552"
+"microsoft","irrelevant","126761498885361664"
+"microsoft","irrelevant","126761427343130624"
+"microsoft","irrelevant","126761080541290496"
+"microsoft","irrelevant","126760617800515584"
+"microsoft","irrelevant","126759986780057600"
+"microsoft","irrelevant","126759818718482432"
+"microsoft","irrelevant","126759115488903168"
+"microsoft","irrelevant","126758582631927809"
+"microsoft","irrelevant","126757586174345216"
+"microsoft","irrelevant","126756934937350144"
+"microsoft","irrelevant","126756701855682560"
+"microsoft","irrelevant","126756084961652736"
+"microsoft","irrelevant","126755997975977984"
+"microsoft","irrelevant","126755662792364032"
+"microsoft","irrelevant","126754698102439936"
+"microsoft","irrelevant","126754357671772160"
+"microsoft","irrelevant","126754317775552513"
+"microsoft","irrelevant","126753938098761728"
+"microsoft","irrelevant","126753907794919424"
+"microsoft","irrelevant","126752731380719616"
+"microsoft","irrelevant","126752590162698241"
+"microsoft","irrelevant","126752126880858112"
+"microsoft","irrelevant","126751890150137856"
+"microsoft","irrelevant","126751116061974528"
+"microsoft","irrelevant","126749587133308928"
+"microsoft","irrelevant","126749255498088448"
+"microsoft","irrelevant","126746850106675200"
+"microsoft","irrelevant","126746549756768258"
+"microsoft","irrelevant","126745825283031040"
+"microsoft","irrelevant","126745438136176640"
+"microsoft","irrelevant","126744626974564352"
+"microsoft","irrelevant","126743570689429504"
+"microsoft","irrelevant","126743504675282944"
+"microsoft","irrelevant","126743035903094785"
+"microsoft","irrelevant","126742038048804866"
+"microsoft","irrelevant","126741924446076928"
+"microsoft","irrelevant","126741919500992512"
+"microsoft","irrelevant","126740492540055552"
+"microsoft","irrelevant","126738342900539392"
+"microsoft","irrelevant","126737787335606273"
+"microsoft","irrelevant","126737489229656066"
+"microsoft","irrelevant","126736841872388096"
+"microsoft","irrelevant","126736717939093504"
+"microsoft","irrelevant","126736619742035968"
+"microsoft","irrelevant","126736441379274752"
+"microsoft","irrelevant","126734707038756864"
+"microsoft","irrelevant","126734622263476224"
+"microsoft","irrelevant","126734381309108224"
+"microsoft","irrelevant","126734290850557952"
+"microsoft","irrelevant","126733828000722944"
+"microsoft","irrelevant","126733528758095872"
+"microsoft","irrelevant","126733262822440960"
+"microsoft","irrelevant","126732894092791808"
+"microsoft","irrelevant","126732424460767233"
+"microsoft","irrelevant","126731601584455682"
+"microsoft","irrelevant","126731253490794496"
+"microsoft","irrelevant","126731136318713856"
+"microsoft","irrelevant","126730979485302784"
+"microsoft","irrelevant","126726825505329152"
+"microsoft","irrelevant","126726459246129152"
+"microsoft","irrelevant","126726358800936960"
+"microsoft","irrelevant","126726063484178432"
+"microsoft","irrelevant","126725627758915584"
+"microsoft","irrelevant","126725465611304960"
+"microsoft","irrelevant","126724685315579904"
+"microsoft","irrelevant","126724454649839616"
+"microsoft","irrelevant","126724248256528385"
+"microsoft","irrelevant","126723588324737024"
+"microsoft","irrelevant","126723323471204352"
+"microsoft","irrelevant","126723066528153600"
+"microsoft","irrelevant","126722796599521281"
+"microsoft","irrelevant","126722505128935424"
+"microsoft","irrelevant","126721828432519170"
+"microsoft","irrelevant","126719767590604801"
+"microsoft","irrelevant","126719569179054080"
+"microsoft","irrelevant","126719029921579008"
+"microsoft","irrelevant","126717715657396224"
+"microsoft","irrelevant","126717214211575808"
+"microsoft","irrelevant","126715806565412864"
+"microsoft","irrelevant","126715672981016577"
+"microsoft","irrelevant","126714068093509633"
+"microsoft","irrelevant","126713830184198144"
+"microsoft","irrelevant","126713264582299648"
+"microsoft","irrelevant","126710706354663424"
+"microsoft","irrelevant","126710657948196864"
+"microsoft","irrelevant","126710063497887744"
+"microsoft","irrelevant","126709780038434816"
+"microsoft","irrelevant","126709737222963200"
+"microsoft","irrelevant","126708721328992257"
+"microsoft","irrelevant","126708681239822336"
+"microsoft","irrelevant","126707839912771585"
+"microsoft","irrelevant","126707460244373504"
+"microsoft","irrelevant","126705840521613312"
+"microsoft","irrelevant","126705791527952384"
+"microsoft","irrelevant","126705448438079488"
+"microsoft","irrelevant","126705060708225024"
+"microsoft","irrelevant","126704896383787008"
+"microsoft","irrelevant","126704346573455360"
+"microsoft","irrelevant","126704316005351424"
+"microsoft","irrelevant","126704236057739264"
+"microsoft","irrelevant","126703003351785472"
+"microsoft","irrelevant","126702733834194944"
+"microsoft","irrelevant","126702673830481920"
+"microsoft","irrelevant","126702046350024704"
+"microsoft","irrelevant","126701862383661056"
+"microsoft","irrelevant","126701401425444864"
+"microsoft","irrelevant","126699572490813441"
+"microsoft","irrelevant","126699294987259904"
+"microsoft","irrelevant","126699236615127041"
+"microsoft","irrelevant","126698924709920768"
+"microsoft","irrelevant","126698924621832192"
+"microsoft","irrelevant","126698507657678848"
+"microsoft","irrelevant","126696228745523200"
+"microsoft","irrelevant","126695671163133952"
+"microsoft","irrelevant","126695665769250818"
+"microsoft","irrelevant","126694595026366464"
+"microsoft","irrelevant","126694584322490368"
+"microsoft","irrelevant","126693348735057920"
+"microsoft","irrelevant","126691436744810496"
+"microsoft","irrelevant","126690582893572096"
+"microsoft","irrelevant","126690506976657408"
+"microsoft","irrelevant","126689129131028480"
+"microsoft","irrelevant","126689124638932993"
+"microsoft","irrelevant","126689077700476929"
+"microsoft","irrelevant","126689077230698496"
+"microsoft","irrelevant","126688740826550272"
+"microsoft","irrelevant","126688659868106752"
+"microsoft","irrelevant","126687780943306752"
+"microsoft","irrelevant","126687120071999490"
+"microsoft","irrelevant","126687048647184384"
+"microsoft","irrelevant","126686733222944768"
+"microsoft","irrelevant","126686455752962048"
+"microsoft","irrelevant","126686446357716992"
+"microsoft","irrelevant","126686301780049920"
+"microsoft","irrelevant","126685380681547777"
+"microsoft","irrelevant","126685141174202369"
+"microsoft","irrelevant","126684849934303233"
+"microsoft","irrelevant","126684618605867008"
+"microsoft","irrelevant","126682886756777984"
+"microsoft","irrelevant","126682505033154560"
+"microsoft","irrelevant","126681644223578113"
+"microsoft","irrelevant","126681070413418496"
+"microsoft","irrelevant","126680181359378432"
+"microsoft","irrelevant","126680158508810240"
+"microsoft","irrelevant","126679552310251521"
+"microsoft","irrelevant","126679463839801344"
+"microsoft","irrelevant","126679305169289216"
+"microsoft","irrelevant","126679060431634432"
+"microsoft","irrelevant","126679053347467264"
+"microsoft","irrelevant","126678520033325057"
+"microsoft","irrelevant","126678376277749760"
+"microsoft","irrelevant","126678301539446784"
+"microsoft","irrelevant","126677890531201024"
+"microsoft","irrelevant","126677821492961280"
+"microsoft","irrelevant","126677668933533696"
+"microsoft","irrelevant","126677325008994306"
+"microsoft","irrelevant","126676840181022720"
+"microsoft","irrelevant","126676566154555395"
+"microsoft","irrelevant","126676435988512768"
+"microsoft","irrelevant","126675755026493440"
+"microsoft","irrelevant","126675459353223168"
+"microsoft","irrelevant","126675434065764352"
+"microsoft","irrelevant","126675392663789569"
+"microsoft","irrelevant","126675085758173187"
+"microsoft","irrelevant","126674938076725248"
+"microsoft","irrelevant","126673983235035138"
+"microsoft","irrelevant","126673920014299137"
+"microsoft","irrelevant","126673684936146944"
+"microsoft","irrelevant","126673474751172608"
+"microsoft","irrelevant","126673257175855106"
+"microsoft","irrelevant","126673062258147328"
+"microsoft","irrelevant","126671792877223936"
+"microsoft","irrelevant","126671141854134273"
+"microsoft","irrelevant","126671001357529089"
+"microsoft","irrelevant","126671000736763904"
+"microsoft","irrelevant","126670954112880640"
+"microsoft","irrelevant","126669765778485248"
+"microsoft","irrelevant","126669192157073408"
+"microsoft","irrelevant","126668863667572736"
+"microsoft","irrelevant","126668329002872833"
+"microsoft","irrelevant","126668278386012160"
+"microsoft","irrelevant","126666904764030977"
+"microsoft","irrelevant","126666281461096448"
+"microsoft","irrelevant","126666155376132097"
+"microsoft","irrelevant","126666022517350400"
+"microsoft","irrelevant","126665539719409664"
+"microsoft","irrelevant","126665086961065985"
+"microsoft","irrelevant","126665078861869056"
+"microsoft","irrelevant","126664812947181568"
+"microsoft","irrelevant","126664329335541760"
+"microsoft","irrelevant","126664120274653184"
+"microsoft","irrelevant","126663390302187521"
+"microsoft","irrelevant","126663150148911105"
+"microsoft","irrelevant","126662722673844224"
+"microsoft","irrelevant","126662658991718400"
+"microsoft","irrelevant","126662635033858049"
+"microsoft","irrelevant","126662553316245504"
+"microsoft","irrelevant","126662533347164161"
+"microsoft","irrelevant","126662436966236160"
+"microsoft","irrelevant","126661775922966528"
+"microsoft","irrelevant","126661415510614018"
+"microsoft","irrelevant","126660850051321858"
+"microsoft","irrelevant","126660622883631104"
+"microsoft","irrelevant","126660026013188097"
+"microsoft","irrelevant","126659873579610113"
+"microsoft","irrelevant","126659604628242432"
+"microsoft","irrelevant","126657946758295552"
+"microsoft","irrelevant","126657340920438785"
+"microsoft","irrelevant","126656806368968704"
+"microsoft","irrelevant","126656462566080513"
+"microsoft","irrelevant","126656050664443904"
+"microsoft","irrelevant","126655886893649921"
+"microsoft","irrelevant","126655535545204737"
+"microsoft","irrelevant","126655432361123840"
+"microsoft","irrelevant","126655411871940608"
+"microsoft","irrelevant","126654661322211328"
+"microsoft","irrelevant","126654651654340608"
+"microsoft","irrelevant","126654309894070273"
+"microsoft","irrelevant","126654232538521600"
+"microsoft","irrelevant","126653714357432320"
+"microsoft","irrelevant","126653276040073216"
+"microsoft","irrelevant","126652806546464768"
+"microsoft","irrelevant","126652711188963328"
+"microsoft","irrelevant","126652683040993280"
+"microsoft","irrelevant","126652553038540800"
+"microsoft","irrelevant","126652025181179904"
+"microsoft","irrelevant","126651878351183873"
+"microsoft","irrelevant","126651164325457920"
+"microsoft","irrelevant","126650231159922689"
+"microsoft","irrelevant","126650224625205248"
+"microsoft","irrelevant","126650108640100352"
+"microsoft","irrelevant","126650101304262656"
+"microsoft","irrelevant","126650034052792321"
+"microsoft","irrelevant","126649985897996288"
+"microsoft","irrelevant","126649791299063808"
+"microsoft","irrelevant","126649528924389378"
+"microsoft","irrelevant","126648588129419264"
+"microsoft","irrelevant","126648260352942080"
+"microsoft","irrelevant","126648259040120832"
+"microsoft","irrelevant","126648114886086657"
+"microsoft","irrelevant","126648049459142656"
+"microsoft","irrelevant","126647771821383682"
+"microsoft","irrelevant","126647390282326017"
+"microsoft","irrelevant","126646647856955392"
+"microsoft","irrelevant","126646439924334592"
+"microsoft","irrelevant","126646302032396289"
+"microsoft","irrelevant","126645036426334208"
+"microsoft","irrelevant","126644764383780865"
+"microsoft","irrelevant","126644360434565124"
+"microsoft","irrelevant","126643786557296640"
+"microsoft","irrelevant","126643010296487936"
+"microsoft","irrelevant","126642364667269120"
+"microsoft","irrelevant","126641501978632192"
+"microsoft","irrelevant","126640207167631361"
+"microsoft","irrelevant","126639750756040706"
+"microsoft","irrelevant","126638913145159681"
+"microsoft","irrelevant","126638752167759872"
+"microsoft","irrelevant","126638520034013184"
+"microsoft","irrelevant","126637475153186816"
+"microsoft","irrelevant","126637471676104704"
+"microsoft","irrelevant","126636564469121024"
+"microsoft","irrelevant","126636005922050048"
+"microsoft","irrelevant","126633706566856704"
+"microsoft","irrelevant","126632414830276608"
+"microsoft","irrelevant","126631816873517056"
+"microsoft","irrelevant","126630944999346176"
+"microsoft","irrelevant","126630702392426496"
+"microsoft","irrelevant","126630465246466048"
+"microsoft","irrelevant","126629195546755072"
+"microsoft","irrelevant","126628979636572160"
+"microsoft","irrelevant","126628891929493504"
+"microsoft","irrelevant","126628699402539008"
+"microsoft","irrelevant","126628570536742912"
+"microsoft","irrelevant","126626670181490688"
+"microsoft","irrelevant","126626576069693440"
+"microsoft","irrelevant","126626327888539648"
+"microsoft","irrelevant","126625929215754240"
+"microsoft","irrelevant","126625386565087232"
+"microsoft","irrelevant","126625317157744640"
+"microsoft","irrelevant","126625265928515584"
+"microsoft","irrelevant","126624831297949696"
+"microsoft","irrelevant","126623895334817792"
+"microsoft","irrelevant","126622818220785664"
+"microsoft","irrelevant","126622165595459584"
+"microsoft","irrelevant","126622031163822081"
+"microsoft","irrelevant","126622030006202368"
+"microsoft","irrelevant","126621969461415936"
+"microsoft","irrelevant","126621883411070976"
+"microsoft","irrelevant","126621712656760832"
+"microsoft","irrelevant","126621298272112643"
+"microsoft","irrelevant","126620982009008129"
+"microsoft","irrelevant","126620721236545536"
+"microsoft","irrelevant","126620532060848129"
+"microsoft","irrelevant","126619975518666752"
+"microsoft","irrelevant","126618143966756864"
+"microsoft","irrelevant","126618143098548224"
+"microsoft","irrelevant","126617262722531328"
+"microsoft","irrelevant","126616352340447233"
+"microsoft","irrelevant","126615874508558336"
+"microsoft","irrelevant","126615672351498240"
+"microsoft","irrelevant","126615378976718848"
+"microsoft","irrelevant","126615034007789569"
+"microsoft","irrelevant","126614513784074240"
+"microsoft","irrelevant","126614370150129664"
+"microsoft","irrelevant","126614145662599169"
+"microsoft","irrelevant","126614144299446272"
+"microsoft","irrelevant","126614136242180097"
+"microsoft","irrelevant","126613470245437440"
+"microsoft","irrelevant","126613334098325504"
+"microsoft","irrelevant","126612194594000896"
+"microsoft","irrelevant","126612152579657728"
+"microsoft","irrelevant","126611679961300993"
+"microsoft","irrelevant","126611107266834433"
+"microsoft","irrelevant","126610365852303361"
+"microsoft","irrelevant","126608861808431107"
+"microsoft","irrelevant","126607587406913536"
+"microsoft","irrelevant","126606198911930368"
+"microsoft","irrelevant","126606101671186432"
+"microsoft","irrelevant","126605924273111042"
+"microsoft","irrelevant","126605409590063104"
+"microsoft","irrelevant","126604075809771520"
+"microsoft","irrelevant","126601340242767872"
+"microsoft","irrelevant","126599691881299968"
+"microsoft","irrelevant","126599445168144384"
+"microsoft","irrelevant","126598693351723010"
+"microsoft","irrelevant","126598545062105088"
+"microsoft","irrelevant","126597416693665793"
+"microsoft","irrelevant","126596658929733632"
+"microsoft","irrelevant","126596412187226112"
+"microsoft","irrelevant","126596388615229441"
+"microsoft","irrelevant","126595424810307584"
+"microsoft","irrelevant","126592053055459328"
+"microsoft","irrelevant","126591243294748672"
+"microsoft","irrelevant","126590333520855040"
+"microsoft","irrelevant","126590035314229249"
+"microsoft","irrelevant","126589888266108929"
+"microsoft","irrelevant","126589139150839808"
+"microsoft","irrelevant","126589085304369152"
+"microsoft","irrelevant","126588920958955521"
+"microsoft","irrelevant","126588570961068032"
+"microsoft","irrelevant","126586819713310720"
+"microsoft","irrelevant","126586599772389376"
+"microsoft","irrelevant","126586563147743232"
+"microsoft","irrelevant","126585997814280192"
+"microsoft","irrelevant","126585826955104256"
+"microsoft","irrelevant","126585200355454976"
+"microsoft","irrelevant","126583539662733312"
+"microsoft","irrelevant","126583374096764928"
+"microsoft","irrelevant","126582476121444352"
+"microsoft","irrelevant","126581464052678656"
+"microsoft","irrelevant","126579540070907904"
+"microsoft","irrelevant","126579035093479425"
+"microsoft","irrelevant","126578736148652032"
+"microsoft","irrelevant","126577183060799488"
+"microsoft","irrelevant","126576827476094976"
+"microsoft","irrelevant","126576629395898368"
+"microsoft","irrelevant","126576294359072768"
+"microsoft","irrelevant","126575943706877953"
+"microsoft","irrelevant","126575853818744832"
+"microsoft","irrelevant","126575680585596928"
+"microsoft","irrelevant","126575637942120448"
+"microsoft","irrelevant","126575368692957184"
+"microsoft","irrelevant","126575205048000512"
+"microsoft","irrelevant","126574756307808256"
+"microsoft","irrelevant","126574310176468992"
+"microsoft","irrelevant","126573880285466625"
+"microsoft","irrelevant","126573790980358145"
+"microsoft","irrelevant","126573784961527808"
+"microsoft","irrelevant","126573378302783488"
+"microsoft","irrelevant","126572846272086016"
+"microsoft","irrelevant","126570427479896064"
+"microsoft","irrelevant","126569812120973312"
+"microsoft","irrelevant","126568272819793920"
+"microsoft","irrelevant","126566960979914752"
+"microsoft","irrelevant","126566043928895489"
+"microsoft","irrelevant","126565939075497984"
+"microsoft","irrelevant","126565570740101120"
+"microsoft","irrelevant","126565301801320448"
+"microsoft","irrelevant","126564330933194752"
+"microsoft","irrelevant","126564298209247232"
+"microsoft","irrelevant","126564244329218048"
+"microsoft","irrelevant","126563471662915584"
+"microsoft","irrelevant","126563328213516288"
+"microsoft","irrelevant","126561890825543680"
+"microsoft","irrelevant","126560733126328321"
+"microsoft","irrelevant","126560506097049600"
+"microsoft","irrelevant","126559394136723456"
+"microsoft","irrelevant","126558250194829312"
+"microsoft","irrelevant","126556805877858306"
+"microsoft","irrelevant","126556628173598720"
+"microsoft","irrelevant","126555304212176897"
+"microsoft","irrelevant","126553180711559169"
+"microsoft","irrelevant","126553028730953730"
+"microsoft","irrelevant","126550811894480896"
+"microsoft","irrelevant","126549243061207040"
+"microsoft","irrelevant","126547733359230976"
+"microsoft","irrelevant","126547233473691649"
+"microsoft","irrelevant","126546908142501888"
+"microsoft","irrelevant","126546297082748928"
+"microsoft","irrelevant","126546080384040960"
+"microsoft","irrelevant","126544902107570176"
+"microsoft","irrelevant","126544031365873664"
+"microsoft","irrelevant","126540569844523008"
+"microsoft","irrelevant","126539960890306560"
+"microsoft","irrelevant","126536951984689152"
+"microsoft","irrelevant","126532894272397312"
+"microsoft","irrelevant","126531176243539968"
+"microsoft","irrelevant","126530398317592576"
+"microsoft","irrelevant","126530235402424322"
+"microsoft","irrelevant","126527536313278465"
+"microsoft","irrelevant","126526431298723841"
+"microsoft","irrelevant","126523675364171778"
+"microsoft","irrelevant","126523554568224771"
+"microsoft","irrelevant","126518913294020608"
+"microsoft","irrelevant","126518577263153152"
+"microsoft","irrelevant","126506168184078336"
+"microsoft","irrelevant","126506057613848576"
+"microsoft","irrelevant","126505970317787136"
+"microsoft","irrelevant","126500518515310592"
+"microsoft","irrelevant","126500332078505985"
+"microsoft","irrelevant","126495306681548800"
+"microsoft","irrelevant","126491523020898304"
+"microsoft","irrelevant","126489614272827392"
+"microsoft","irrelevant","126488920329433088"
+"microsoft","irrelevant","126487014957785088"
+"microsoft","irrelevant","126485491238436866"
+"microsoft","irrelevant","126484927649820673"
+"twitter","positive","126883590041640960"
+"twitter","positive","126883448173510656"
+"twitter","positive","126883416280006656"
+"twitter","positive","126883364887203840"
+"twitter","positive","126883290782244864"
+"twitter","positive","126883211006590976"
+"twitter","positive","126883187300384768"
+"twitter","positive","126882971411165185"
+"twitter","positive","126882662932692992"
+"twitter","positive","126882408963391488"
+"twitter","positive","126881835463614464"
+"twitter","positive","126881080178507776"
+"twitter","positive","126880912754475008"
+"twitter","positive","126880559162077184"
+"twitter","positive","126880385605976064"
+"twitter","positive","126879785908580352"
+"twitter","positive","126879662851887104"
+"twitter","positive","126878670685085696"
+"twitter","positive","126877750131818497"
+"twitter","positive","126877362632667136"
+"twitter","positive","126877263311536128"
+"twitter","positive","126877209813188608"
+"twitter","positive","126877171926040576"
+"twitter","positive","126877056578486272"
+"twitter","positive","126876733113778176"
+"twitter","positive","126876654118240257"
+"twitter","positive","126876600083025920"
+"twitter","positive","126876125107462144"
+"twitter","positive","126876107881455616"
+"twitter","positive","126875611095502848"
+"twitter","positive","126875441217798144"
+"twitter","positive","126875378013843456"
+"twitter","positive","126874748469788672"
+"twitter","positive","126874346873556993"
+"twitter","positive","126873707066048513"
+"twitter","positive","126873518385274882"
+"twitter","positive","126873128348561409"
+"twitter","positive","126872791197814784"
+"twitter","positive","126872175490764802"
+"twitter","positive","126870551032643584"
+"twitter","positive","126870402751397889"
+"twitter","positive","126869964144644097"
+"twitter","positive","126869134238679042"
+"twitter","positive","126868475892338688"
+"twitter","positive","126868330098331648"
+"twitter","positive","126867350476697601"
+"twitter","positive","126865422174785536"
+"twitter","positive","126864056366804992"
+"twitter","positive","126863862791282688"
+"twitter","positive","126863821594832897"
+"twitter","positive","126863766334873600"
+"twitter","positive","126863084433326080"
+"twitter","positive","126862820578050048"
+"twitter","positive","126862735953768448"
+"twitter","positive","126862443275235328"
+"twitter","positive","126862309497905152"
+"twitter","positive","126862124201947136"
+"twitter","positive","126861364227608577"
+"twitter","positive","126860944352612353"
+"twitter","positive","126860415085973504"
+"twitter","positive","126860046981279744"
+"twitter","positive","126859858443112449"
+"twitter","positive","126859371094360064"
+"twitter","positive","126858606695030784"
+"twitter","positive","126858477942476800"
+"twitter","positive","126858393909608448"
+"twitter","positive","126857095088840706"
+"twitter","positive","126857082199744513"
+"twitter","negative","126883562652844033"
+"twitter","negative","126883300227817472"
+"twitter","negative","126883243726344193"
+"twitter","negative","126882964582838272"
+"twitter","negative","126882934568390656"
+"twitter","negative","126882761733705728"
+"twitter","negative","126881698783834112"
+"twitter","negative","126881658854064128"
+"twitter","negative","126881376074076161"
+"twitter","negative","126881010301419520"
+"twitter","negative","126880978185625600"
+"twitter","negative","126880813991202816"
+"twitter","negative","126879988602519552"
+"twitter","negative","126879964619485185"
+"twitter","negative","126878518310223874"
+"twitter","negative","126878448575717376"
+"twitter","negative","126878307693244417"
+"twitter","negative","126877679826894849"
+"twitter","negative","126877484271665152"
+"twitter","negative","126877335399051264"
+"twitter","negative","126877245347348480"
+"twitter","negative","126877135926337537"
+"twitter","negative","126876956443688960"
+"twitter","negative","126876682207502336"
+"twitter","negative","126876493153452032"
+"twitter","negative","126876140269862912"
+"twitter","negative","126876046028050432"
+"twitter","negative","126875653210521600"
+"twitter","negative","126874389210861568"
+"twitter","negative","126873912624693249"
+"twitter","negative","126873860745330689"
+"twitter","negative","126872684658294784"
+"twitter","negative","126872492118769664"
+"twitter","negative","126872325663621120"
+"twitter","negative","126872316142559232"
+"twitter","negative","126872265328562176"
+"twitter","negative","126871878886363136"
+"twitter","negative","126871286545788928"
+"twitter","negative","126870923591692288"
+"twitter","negative","126870920018137088"
+"twitter","negative","126870745258266626"
+"twitter","negative","126870618825179136"
+"twitter","negative","126870358816067584"
+"twitter","negative","126870162510057473"
+"twitter","negative","126869855621218304"
+"twitter","negative","126869842769870848"
+"twitter","negative","126869706639544320"
+"twitter","negative","126869466054275073"
+"twitter","negative","126869063023607808"
+"twitter","negative","126869039233511425"
+"twitter","negative","126868993263943682"
+"twitter","negative","126868622026080256"
+"twitter","negative","126868404182319105"
+"twitter","negative","126867983455879168"
+"twitter","negative","126867876115259394"
+"twitter","negative","126867543955738624"
+"twitter","negative","126867446220062720"
+"twitter","negative","126867188677218304"
+"twitter","negative","126866560756363264"
+"twitter","negative","126866493370679297"
+"twitter","negative","126866003094290434"
+"twitter","negative","126865431142219776"
+"twitter","negative","126864648577351681"
+"twitter","negative","126864610446942209"
+"twitter","negative","126864575508381696"
+"twitter","negative","126864510194683904"
+"twitter","negative","126864203557507072"
+"twitter","negative","126864059206336513"
+"twitter","negative","126863505851809793"
+"twitter","negative","126862065649459200"
+"twitter","negative","126861410864087042"
+"twitter","negative","126861382078578688"
+"twitter","negative","126861309185761280"
+"twitter","negative","126861228797722624"
+"twitter","negative","126859898897174528"
+"twitter","negative","126858852976181250"
+"twitter","negative","126858607789740032"
+"twitter","negative","126857136855719936"
+"twitter","neutral","126883719368818688"
+"twitter","neutral","126883711131201536"
+"twitter","neutral","126883672682004480"
+"twitter","neutral","126883640671076352"
+"twitter","neutral","126883630369882112"
+"twitter","neutral","126883597239066625"
+"twitter","neutral","126883517706674176"
+"twitter","neutral","126883457614884864"
+"twitter","neutral","126883452820783104"
+"twitter","neutral","126883438954422274"
+"twitter","neutral","126883335875203072"
+"twitter","neutral","126883224587739136"
+"twitter","neutral","126883185396170752"
+"twitter","neutral","126883158942695425"
+"twitter","neutral","126883124595527681"
+"twitter","neutral","126883013236752384"
+"twitter","neutral","126883005263392768"
+"twitter","neutral","126882970811379712"
+"twitter","neutral","126882885553758208"
+"twitter","neutral","126882832319651840"
+"twitter","neutral","126882800585539585"
+"twitter","neutral","126882730154803200"
+"twitter","neutral","126882726061146112"
+"twitter","neutral","126882653893967872"
+"twitter","neutral","126882617843924992"
+"twitter","neutral","126882559522111488"
+"twitter","neutral","126882542610690049"
+"twitter","neutral","126882518799626241"
+"twitter","neutral","126882493059170304"
+"twitter","neutral","126882453943103488"
+"twitter","neutral","126882427661582336"
+"twitter","neutral","126882349588815873"
+"twitter","neutral","126882248644493312"
+"twitter","neutral","126882244982878208"
+"twitter","neutral","126882193967550464"
+"twitter","neutral","126882122077184000"
+"twitter","neutral","126882090343079937"
+"twitter","neutral","126882054259474432"
+"twitter","neutral","126881996629753856"
+"twitter","neutral","126881960416120832"
+"twitter","neutral","126881887049351168"
+"twitter","neutral","126881736364785664"
+"twitter","neutral","126881682266652672"
+"twitter","neutral","126881626583076864"
+"twitter","neutral","126881596086288385"
+"twitter","neutral","126881589606105089"
+"twitter","neutral","126881580521234432"
+"twitter","neutral","126881523755528192"
+"twitter","neutral","126881380503273472"
+"twitter","neutral","126881317894893568"
+"twitter","neutral","126881309015539712"
+"twitter","neutral","126881203642040320"
+"twitter","neutral","126881167541665792"
+"twitter","neutral","126881136398962688"
+"twitter","neutral","126881090446163968"
+"twitter","neutral","126881073366958080"
+"twitter","neutral","126881072167399425"
+"twitter","neutral","126880978273697792"
+"twitter","neutral","126880926268526592"
+"twitter","neutral","126880883822166017"
+"twitter","neutral","126880854361391104"
+"twitter","neutral","126880672190185472"
+"twitter","neutral","126880656352481280"
+"twitter","neutral","126880621170659328"
+"twitter","neutral","126880571233280000"
+"twitter","neutral","126880518410215425"
+"twitter","neutral","126880481361920000"
+"twitter","neutral","126880429256093696"
+"twitter","neutral","126880399912742912"
+"twitter","neutral","126880329913995264"
+"twitter","neutral","126880253145657344"
+"twitter","neutral","126880223433195520"
+"twitter","neutral","126880178705141762"
+"twitter","neutral","126880108718989313"
+"twitter","neutral","126880102226206720"
+"twitter","neutral","126880068252336128"
+"twitter","neutral","126880057141641216"
+"twitter","neutral","126880036488880128"
+"twitter","neutral","126880035507412992"
+"twitter","neutral","126880030486822912"
+"twitter","neutral","126880007741128704"
+"twitter","neutral","126879867731062784"
+"twitter","neutral","126879768481247232"
+"twitter","neutral","126879759866142720"
+"twitter","neutral","126879759316697088"
+"twitter","neutral","126879729189011457"
+"twitter","neutral","126879677737480192"
+"twitter","neutral","126879608120418305"
+"twitter","neutral","126879548687130624"
+"twitter","neutral","126879506521792513"
+"twitter","neutral","126879491124506624"
+"twitter","neutral","126879484736585729"
+"twitter","neutral","126879463450488832"
+"twitter","neutral","126879462041206784"
+"twitter","neutral","126879413013987328"
+"twitter","neutral","126879328712658944"
+"twitter","neutral","126879257975734272"
+"twitter","neutral","126879221724356608"
+"twitter","neutral","126879149003509760"
+"twitter","neutral","126879122298372097"
+"twitter","neutral","126879106347433984"
+"twitter","neutral","126879073220829184"
+"twitter","neutral","126879068800024576"
+"twitter","neutral","126879028706672640"
+"twitter","neutral","126879022188724224"
+"twitter","neutral","126878948561924096"
+"twitter","neutral","126878824574095360"
+"twitter","neutral","126878811009728513"
+"twitter","neutral","126878688682835968"
+"twitter","neutral","126878654927077376"
+"twitter","neutral","126878622509309952"
+"twitter","neutral","126878620953231360"
+"twitter","neutral","126878545338310656"
+"twitter","neutral","126878539487252480"
+"twitter","neutral","126878426312351744"
+"twitter","neutral","126878409023426560"
+"twitter","neutral","126878311497474048"
+"twitter","neutral","126878176063389696"
+"twitter","neutral","126878130353876992"
+"twitter","neutral","126878062846554114"
+"twitter","neutral","126878058278952960"
+"twitter","neutral","126877998115852288"
+"twitter","neutral","126877997717405697"
+"twitter","neutral","126877971637211136"
+"twitter","neutral","126877965064740864"
+"twitter","neutral","126877936514105344"
+"twitter","neutral","126877900610875393"
+"twitter","neutral","126877869547855872"
+"twitter","neutral","126877729856557056"
+"twitter","neutral","126877666040225792"
+"twitter","neutral","126877629600108544"
+"twitter","neutral","126877612214726657"
+"twitter","neutral","126877585488609282"
+"twitter","neutral","126877557550354433"
+"twitter","neutral","126877527674322944"
+"twitter","neutral","126877467507040257"
+"twitter","neutral","126877441359757313"
+"twitter","neutral","126877420178522113"
+"twitter","neutral","126877416630136832"
+"twitter","neutral","126877186689998848"
+"twitter","neutral","126877128204627970"
+"twitter","neutral","126877032704507904"
+"twitter","neutral","126876964287033344"
+"twitter","neutral","126876948206075904"
+"twitter","neutral","126876905696796672"
+"twitter","neutral","126876843272974337"
+"twitter","neutral","126876834548822017"
+"twitter","neutral","126876823723315200"
+"twitter","neutral","126876778806509568"
+"twitter","neutral","126876760062181376"
+"twitter","neutral","126876750788567040"
+"twitter","neutral","126876736137871361"
+"twitter","neutral","126876733877133312"
+"twitter","neutral","126876684925415425"
+"twitter","neutral","126876682928926720"
+"twitter","neutral","126876676822007809"
+"twitter","neutral","126876593531518976"
+"twitter","neutral","126876559977095168"
+"twitter","neutral","126876501755957249"
+"twitter","neutral","126876490364223488"
+"twitter","neutral","126876467631104000"
+"twitter","neutral","126876452762296321"
+"twitter","neutral","126876394339831808"
+"twitter","neutral","126876099786444800"
+"twitter","neutral","126876078592638976"
+"twitter","neutral","126876009797656576"
+"twitter","neutral","126876004730933250"
+"twitter","neutral","126875958929145856"
+"twitter","neutral","126875958694260736"
+"twitter","neutral","126875943284379649"
+"twitter","neutral","126875894177480705"
+"twitter","neutral","126875893552525312"
+"twitter","neutral","126875887093293056"
+"twitter","neutral","126875872165769216"
+"twitter","neutral","126875805505691648"
+"twitter","neutral","126875787818319872"
+"twitter","neutral","126875761469689856"
+"twitter","neutral","126875734005399552"
+"twitter","neutral","126875692855078912"
+"twitter","neutral","126875612341207040"
+"twitter","neutral","126875579172663296"
+"twitter","neutral","126875562663874560"
+"twitter","neutral","126875457265205248"
+"twitter","neutral","126875416760815616"
+"twitter","neutral","126875301157404672"
+"twitter","neutral","126875280018112512"
+"twitter","neutral","126875274901065728"
+"twitter","neutral","126875267674284032"
+"twitter","neutral","126875204772311040"
+"twitter","neutral","126875183725297664"
+"twitter","neutral","126875160623058944"
+"twitter","neutral","126875159800987649"
+"twitter","neutral","126875080084033536"
+"twitter","neutral","126875059477426176"
+"twitter","neutral","126875035817349120"
+"twitter","neutral","126874978674151424"
+"twitter","neutral","126874943341330432"
+"twitter","neutral","126874922017488896"
+"twitter","neutral","126874909933711361"
+"twitter","neutral","126874886378496000"
+"twitter","neutral","126874883207610368"
+"twitter","neutral","126874847614734336"
+"twitter","neutral","126874811296251904"
+"twitter","neutral","126874799099219969"
+"twitter","neutral","126874783756455936"
+"twitter","neutral","126874662268452864"
+"twitter","neutral","126874568655777793"
+"twitter","neutral","126874543770976256"
+"twitter","neutral","126874532555399168"
+"twitter","neutral","126874503455318016"
+"twitter","neutral","126874467824697344"
+"twitter","neutral","126874421829971968"
+"twitter","neutral","126874417824399360"
+"twitter","neutral","126874384139948032"
+"twitter","neutral","126874374077816832"
+"twitter","neutral","126874332176711681"
+"twitter","neutral","126874244629016576"
+"twitter","neutral","126874228296396800"
+"twitter","neutral","126874200253276160"
+"twitter","neutral","126874174722539520"
+"twitter","neutral","126874159606267904"
+"twitter","neutral","126874138836074497"
+"twitter","neutral","126874058062184448"
+"twitter","neutral","126874038164393984"
+"twitter","neutral","126873977284079616"
+"twitter","neutral","126873903552405504"
+"twitter","neutral","126873883172274176"
+"twitter","neutral","126873866575425536"
+"twitter","neutral","126873786715873280"
+"twitter","neutral","126873749558530049"
+"twitter","neutral","126873686987902976"
+"twitter","neutral","126873680654516224"
+"twitter","neutral","126873665601146882"
+"twitter","neutral","126873546017357825"
+"twitter","neutral","126873484746952705"
+"twitter","neutral","126873420192423936"
+"twitter","neutral","126873418846044160"
+"twitter","neutral","126873366304010240"
+"twitter","neutral","126873364173299712"
+"twitter","neutral","126873323014590464"
+"twitter","neutral","126873190420062208"
+"twitter","neutral","126873153015263233"
+"twitter","neutral","126873112389226497"
+"twitter","neutral","126873062439260160"
+"twitter","neutral","126873003920330752"
+"twitter","neutral","126873001198239744"
+"twitter","neutral","126872982315474945"
+"twitter","neutral","126872958068207616"
+"twitter","neutral","126872890007236608"
+"twitter","neutral","126872886748266496"
+"twitter","neutral","126872872294694912"
+"twitter","neutral","126872871254491137"
+"twitter","neutral","126872821375827968"
+"twitter","neutral","126872777130123264"
+"twitter","neutral","126872773925679105"
+"twitter","neutral","126872528235921409"
+"twitter","neutral","126872511492263937"
+"twitter","neutral","126872465635938304"
+"twitter","neutral","126872432517709825"
+"twitter","neutral","126872371901640705"
+"twitter","neutral","126872360052736000"
+"twitter","neutral","126872357099945984"
+"twitter","neutral","126872338158452736"
+"twitter","neutral","126872320043257857"
+"twitter","neutral","126872299709280256"
+"twitter","neutral","126872290968338432"
+"twitter","neutral","126872267257950209"
+"twitter","neutral","126872221292576768"
+"twitter","neutral","126872201663229952"
+"twitter","neutral","126872199838699520"
+"twitter","neutral","126872156763201536"
+"twitter","neutral","126872143081390080"
+"twitter","neutral","126872087414583296"
+"twitter","neutral","126872051242893312"
+"twitter","neutral","126872004652580864"
+"twitter","neutral","126871971827953664"
+"twitter","neutral","126871955419836417"
+"twitter","neutral","126871924059013120"
+"twitter","neutral","126871857277308930"
+"twitter","neutral","126871758757306368"
+"twitter","neutral","126871747759837185"
+"twitter","neutral","126871669871620096"
+"twitter","neutral","126871663299137536"
+"twitter","neutral","126871626133417985"
+"twitter","neutral","126871606952853504"
+"twitter","neutral","126871562707144704"
+"twitter","neutral","126871562098982912"
+"twitter","neutral","126871520785072129"
+"twitter","neutral","126871511326924800"
+"twitter","neutral","126871458998788096"
+"twitter","neutral","126871458726162432"
+"twitter","neutral","126871437322629120"
+"twitter","neutral","126871411431194624"
+"twitter","neutral","126871372164120576"
+"twitter","neutral","126871299741069312"
+"twitter","neutral","126871291998371840"
+"twitter","neutral","126871201778905088"
+"twitter","neutral","126871143431938048"
+"twitter","neutral","126871134439346177"
+"twitter","neutral","126871086863355905"
+"twitter","neutral","126871066760065024"
+"twitter","neutral","126871041225134080"
+"twitter","neutral","126871013937000450"
+"twitter","neutral","126871003132465152"
+"twitter","neutral","126870943489466368"
+"twitter","neutral","126870941992091648"
+"twitter","neutral","126870940570226688"
+"twitter","neutral","126870931040768000"
+"twitter","neutral","126870883481567232"
+"twitter","neutral","126870815877771264"
+"twitter","neutral","126870807191363584"
+"twitter","neutral","126870792960086018"
+"twitter","neutral","126870788564467713"
+"twitter","neutral","126870756926816256"
+"twitter","neutral","126870706943295489"
+"twitter","neutral","126870699108339712"
+"twitter","neutral","126870669030998016"
+"twitter","neutral","126870651112927232"
+"twitter","neutral","126870623086592000"
+"twitter","neutral","126870593764208640"
+"twitter","neutral","126870550546096128"
+"twitter","neutral","126870454299398144"
+"twitter","neutral","126870356563722240"
+"twitter","neutral","126870354722439169"
+"twitter","neutral","126870255703306241"
+"twitter","neutral","126870119363260416"
+"twitter","neutral","126870091341115392"
+"twitter","neutral","126869939536674816"
+"twitter","neutral","126869876588552192"
+"twitter","neutral","126869850751631361"
+"twitter","neutral","126869816584839168"
+"twitter","neutral","126869769608642560"
+"twitter","neutral","126869765049434112"
+"twitter","neutral","126869751476654080"
+"twitter","neutral","126869749098487809"
+"twitter","neutral","126869742068842496"
+"twitter","neutral","126869644534489088"
+"twitter","neutral","126869604348862464"
+"twitter","neutral","126869604214652929"
+"twitter","neutral","126869575202643968"
+"twitter","neutral","126869488007262208"
+"twitter","neutral","126869448069095424"
+"twitter","neutral","126869378452037632"
+"twitter","neutral","126869302887464960"
+"twitter","neutral","126869296994451457"
+"twitter","neutral","126869197413285888"
+"twitter","neutral","126869175053467649"
+"twitter","neutral","126868990730575872"
+"twitter","neutral","126868950637219840"
+"twitter","neutral","126868911361757185"
+"twitter","neutral","126868888074989568"
+"twitter","neutral","126868868512743425"
+"twitter","neutral","126868831493820416"
+"twitter","neutral","126868828457144321"
+"twitter","neutral","126868783431303168"
+"twitter","neutral","126868754033426434"
+"twitter","neutral","126868751533617152"
+"twitter","neutral","126868633195524096"
+"twitter","neutral","126868627109584896"
+"twitter","neutral","126868570226425856"
+"twitter","neutral","126868533991849988"
+"twitter","neutral","126868465377226752"
+"twitter","neutral","126868429864046592"
+"twitter","neutral","126868429796933632"
+"twitter","neutral","126868397282689025"
+"twitter","neutral","126868271625539585"
+"twitter","neutral","126868239203573760"
+"twitter","neutral","126868233084080130"
+"twitter","neutral","126868090687459328"
+"twitter","neutral","126868017106780160"
+"twitter","neutral","126868008902737920"
+"twitter","neutral","126867948471189504"
+"twitter","neutral","126867947418427393"
+"twitter","neutral","126867879382614016"
+"twitter","neutral","126867869236604928"
+"twitter","neutral","126867793089015808"
+"twitter","neutral","126867786680111104"
+"twitter","neutral","126867774738927617"
+"twitter","neutral","126867767914799104"
+"twitter","neutral","126867760964841473"
+"twitter","neutral","126867711807598592"
+"twitter","neutral","126867680966881280"
+"twitter","neutral","126867577925406721"
+"twitter","neutral","126867506127310848"
+"twitter","neutral","126867487659786240"
+"twitter","neutral","126867439177842688"
+"twitter","neutral","126867417401012225"
+"twitter","neutral","126867400250490880"
+"twitter","neutral","126867371364319232"
+"twitter","neutral","126867320005066752"
+"twitter","neutral","126867304549072896"
+"twitter","neutral","126867275113447424"
+"twitter","neutral","126867260106227713"
+"twitter","neutral","126867201776037888"
+"twitter","neutral","126867198642884608"
+"twitter","neutral","126867164253798400"
+"twitter","neutral","126867147673714688"
+"twitter","neutral","126867119592837121"
+"twitter","neutral","126867067776405506"
+"twitter","neutral","126867012336095232"
+"twitter","neutral","126866981377941504"
+"twitter","neutral","126866968912478208"
+"twitter","neutral","126866968753086464"
+"twitter","neutral","126866963887689728"
+"twitter","neutral","126866861521502208"
+"twitter","neutral","126866854240202753"
+"twitter","neutral","126866804575440897"
+"twitter","neutral","126866628859281408"
+"twitter","neutral","126866575700664320"
+"twitter","neutral","126866562111123456"
+"twitter","neutral","126866557325426688"
+"twitter","neutral","126866520486846465"
+"twitter","neutral","126866446323171328"
+"twitter","neutral","126866438517567488"
+"twitter","neutral","126866422012981248"
+"twitter","neutral","126866421719384065"
+"twitter","neutral","126866413053939712"
+"twitter","neutral","126866411275554816"
+"twitter","neutral","126866376899035136"
+"twitter","neutral","126866353561927680"
+"twitter","neutral","126866303138013184"
+"twitter","neutral","126866277213016064"
+"twitter","neutral","126866234712145920"
+"twitter","neutral","126866190193790976"
+"twitter","neutral","126866187790450688"
+"twitter","neutral","126866185764601856"
+"twitter","neutral","126866177778655233"
+"twitter","neutral","126866083708801024"
+"twitter","neutral","126866034048241664"
+"twitter","neutral","126865968021516288"
+"twitter","neutral","126865954645884928"
+"twitter","neutral","126865903521505280"
+"twitter","neutral","126865888724004864"
+"twitter","neutral","126865881069391872"
+"twitter","neutral","126865879848853505"
+"twitter","neutral","126865837800951808"
+"twitter","neutral","126865802434580480"
+"twitter","neutral","126865779349127170"
+"twitter","neutral","126865691923062784"
+"twitter","neutral","126865511878361090"
+"twitter","neutral","126865453221027843"
+"twitter","neutral","126865436590604288"
+"twitter","neutral","126865419830177794"
+"twitter","neutral","126865247465254912"
+"twitter","neutral","126865215915687936"
+"twitter","neutral","126865145812107264"
+"twitter","neutral","126865121938116608"
+"twitter","neutral","126865091026100224"
+"twitter","neutral","126865038479867904"
+"twitter","neutral","126865038085591041"
+"twitter","neutral","126865005009309696"
+"twitter","neutral","126864987078660097"
+"twitter","neutral","126864979432456193"
+"twitter","neutral","126864974097293312"
+"twitter","neutral","126864886402777088"
+"twitter","neutral","126864870032408576"
+"twitter","neutral","126864861576704000"
+"twitter","neutral","126864793373122560"
+"twitter","neutral","126864745587412992"
+"twitter","neutral","126864657817407490"
+"twitter","neutral","126864641174417408"
+"twitter","neutral","126864610958647296"
+"twitter","neutral","126864491890749440"
+"twitter","neutral","126864475247742977"
+"twitter","neutral","126864423037046784"
+"twitter","neutral","126864404196241408"
+"twitter","neutral","126864301293182977"
+"twitter","neutral","126864271501041664"
+"twitter","neutral","126864244716208129"
+"twitter","neutral","126864237200023553"
+"twitter","neutral","126864231718076417"
+"twitter","neutral","126864167343894529"
+"twitter","neutral","126864141561507840"
+"twitter","neutral","126864136226357248"
+"twitter","neutral","126864131289661441"
+"twitter","neutral","126864105347878912"
+"twitter","neutral","126864096388849665"
+"twitter","neutral","126864070249947136"
+"twitter","neutral","126864043154751489"
+"twitter","neutral","126864007784185856"
+"twitter","neutral","126863975223795712"
+"twitter","neutral","126863972778508289"
+"twitter","neutral","126863957767110656"
+"twitter","neutral","126863949584023552"
+"twitter","neutral","126863942634057728"
+"twitter","neutral","126863921465393152"
+"twitter","neutral","126863918646820864"
+"twitter","neutral","126863876066254848"
+"twitter","neutral","126863870689165312"
+"twitter","neutral","126863814619709441"
+"twitter","neutral","126863776476708864"
+"twitter","neutral","126863772877996034"
+"twitter","neutral","126863571912114177"
+"twitter","neutral","126863525481156608"
+"twitter","neutral","126863470397366272"
+"twitter","neutral","126863457642483712"
+"twitter","neutral","126863410750160896"
+"twitter","neutral","126863409680625664"
+"twitter","neutral","126863392764989440"
+"twitter","neutral","126863275450310656"
+"twitter","neutral","126863240041996289"
+"twitter","neutral","126863216046374912"
+"twitter","neutral","126863212762247168"
+"twitter","neutral","126863206193963008"
+"twitter","neutral","126863190691811328"
+"twitter","neutral","126863104280760320"
+"twitter","neutral","126863072269840384"
+"twitter","neutral","126863060794224640"
+"twitter","neutral","126862999720951808"
+"twitter","neutral","126862947346694144"
+"twitter","neutral","126862946310692864"
+"twitter","neutral","126862939159412740"
+"twitter","neutral","126862902325022720"
+"twitter","neutral","126862899804246016"
+"twitter","neutral","126862897639993344"
+"twitter","neutral","126862892128677888"
+"twitter","neutral","126862842052874240"
+"twitter","neutral","126862821635002368"
+"twitter","neutral","126862767520096257"
+"twitter","neutral","126862734863241217"
+"twitter","neutral","126862728181714944"
+"twitter","neutral","126862726311059457"
+"twitter","neutral","126862714957078528"
+"twitter","neutral","126862636922044417"
+"twitter","neutral","126862618836221954"
+"twitter","neutral","126862573697114112"
+"twitter","neutral","126862560870940673"
+"twitter","neutral","126862552025137152"
+"twitter","neutral","126862547214286848"
+"twitter","neutral","126862517216612354"
+"twitter","neutral","126862494437351425"
+"twitter","neutral","126862407309082625"
+"twitter","neutral","126862391924375552"
+"twitter","neutral","126862244427468800"
+"twitter","neutral","126862244075151361"
+"twitter","neutral","126862174277738496"
+"twitter","neutral","126862170502860800"
+"twitter","neutral","126862130136879104"
+"twitter","neutral","126862106443255810"
+"twitter","neutral","126862039225352192"
+"twitter","neutral","126862030979334144"
+"twitter","neutral","126862000981671938"
+"twitter","neutral","126861997127110657"
+"twitter","neutral","126861952369700865"
+"twitter","neutral","126861941372239872"
+"twitter","neutral","126861916286103552"
+"twitter","neutral","126861895436206081"
+"twitter","neutral","126861880194109440"
+"twitter","neutral","126861879598530561"
+"twitter","neutral","126861823797493760"
+"twitter","neutral","126861820169437184"
+"twitter","neutral","126861776133431296"
+"twitter","neutral","126861734144245760"
+"twitter","neutral","126861727374643200"
+"twitter","neutral","126861715752222720"
+"twitter","neutral","126861701168631808"
+"twitter","neutral","126861637931118592"
+"twitter","neutral","126861630695940096"
+"twitter","neutral","126861453209767936"
+"twitter","neutral","126861421408567296"
+"twitter","neutral","126861418078277632"
+"twitter","neutral","126861321995173890"
+"twitter","neutral","126861294312759296"
+"twitter","neutral","126861285307584514"
+"twitter","neutral","126861195058757634"
+"twitter","neutral","126861181108498432"
+"twitter","neutral","126861149726715904"
+"twitter","neutral","126861036803473408"
+"twitter","neutral","126861011813810176"
+"twitter","neutral","126860964992794624"
+"twitter","neutral","126860955605934081"
+"twitter","neutral","126860933988483073"
+"twitter","neutral","126860932881186817"
+"twitter","neutral","126860898567589888"
+"twitter","neutral","126860835560755200"
+"twitter","neutral","126860802392195072"
+"twitter","neutral","126860800978722816"
+"twitter","neutral","126860744913469440"
+"twitter","neutral","126860714118885376"
+"twitter","neutral","126860700915208193"
+"twitter","neutral","126860691255721984"
+"twitter","neutral","126860597013917697"
+"twitter","neutral","126860563740495872"
+"twitter","neutral","126860548359995393"
+"twitter","neutral","126860527497515008"
+"twitter","neutral","126860504240107520"
+"twitter","neutral","126860495079735296"
+"twitter","neutral","126860492261167104"
+"twitter","neutral","126860390356357121"
+"twitter","neutral","126860373117775872"
+"twitter","neutral","126860341941518336"
+"twitter","neutral","126860267178049536"
+"twitter","neutral","126860173053669376"
+"twitter","neutral","126860038525562880"
+"twitter","neutral","126859978941276161"
+"twitter","neutral","126859887404777472"
+"twitter","neutral","126859856782163968"
+"twitter","neutral","126859833088552960"
+"twitter","neutral","126859789883015168"
+"twitter","neutral","126859782601703424"
+"twitter","neutral","126859745154957312"
+"twitter","neutral","126859710740701185"
+"twitter","neutral","126859623671152640"
+"twitter","neutral","126859623532732417"
+"twitter","neutral","126859604985511937"
+"twitter","neutral","126859530305929216"
+"twitter","neutral","126859485322035200"
+"twitter","neutral","126859432511537152"
+"twitter","neutral","126859340622725120"
+"twitter","neutral","126859326294999041"
+"twitter","neutral","126859246213136384"
+"twitter","neutral","126859124016300032"
+"twitter","neutral","126859115657043968"
+"twitter","neutral","126859053757501440"
+"twitter","neutral","126859044756520960"
+"twitter","neutral","126858999512580096"
+"twitter","neutral","126858961159864320"
+"twitter","neutral","126858958894931968"
+"twitter","neutral","126858953673027584"
+"twitter","neutral","126858718762639360"
+"twitter","neutral","126858698520932352"
+"twitter","neutral","126858281867149312"
+"twitter","neutral","126858276339056640"
+"twitter","neutral","126858233032871937"
+"twitter","neutral","126858194046816256"
+"twitter","neutral","126858149859831808"
+"twitter","neutral","126857916631355393"
+"twitter","neutral","126857800411398144"
+"twitter","neutral","126857676134166528"
+"twitter","neutral","126857481006751744"
+"twitter","neutral","126857475034071040"
+"twitter","neutral","126857044677505024"
+"twitter","neutral","126856873738633216"
+"twitter","neutral","126856857527648256"
+"twitter","neutral","126856848778342402"
+"twitter","neutral","126856541453291520"
+"twitter","neutral","126856421907243009"
+"twitter","neutral","126856387211960320"
+"twitter","neutral","126856150980374528"
+"twitter","neutral","126856031367204865"
+"twitter","neutral","126855856414404608"
+"twitter","neutral","126855838047547392"
+"twitter","neutral","126855191571070976"
+"twitter","neutral","126854358817181696"
+"twitter","neutral","126853913591808002"
+"twitter","neutral","126853667738497025"
+"twitter","neutral","126853298996252674"
+"twitter","irrelevant","126883777938067457"
+"twitter","irrelevant","126883741481177088"
+"twitter","irrelevant","126883583691472896"
+"twitter","irrelevant","126883512073719808"
+"twitter","irrelevant","126883431308197888"
+"twitter","irrelevant","126883226760384512"
+"twitter","irrelevant","126883122519343105"
+"twitter","irrelevant","126883074888826880"
+"twitter","irrelevant","126882987244662784"
+"twitter","irrelevant","126882954629742592"
+"twitter","irrelevant","126882787029553153"
+"twitter","irrelevant","126882743819833345"
+"twitter","irrelevant","126882703000879105"
+"twitter","irrelevant","126882629365661696"
+"twitter","irrelevant","126882613569912832"
+"twitter","irrelevant","126882562202271744"
+"twitter","irrelevant","126882507621797889"
+"twitter","irrelevant","126882498536939521"
+"twitter","irrelevant","126882470703529985"
+"twitter","irrelevant","126882436930994176"
+"twitter","irrelevant","126882291757752320"
+"twitter","irrelevant","126882264360558592"
+"twitter","irrelevant","126882080050262016"
+"twitter","irrelevant","126881828337483776"
+"twitter","irrelevant","126881827339243521"
+"twitter","irrelevant","126881659516755969"
+"twitter","irrelevant","126881629145808896"
+"twitter","irrelevant","126881619335327744"
+"twitter","irrelevant","126881591627759616"
+"twitter","irrelevant","126881518151929856"
+"twitter","irrelevant","126881495481724931"
+"twitter","irrelevant","126881485264400385"
+"twitter","irrelevant","126881462334144513"
+"twitter","irrelevant","126881427550773248"
+"twitter","irrelevant","126881398316466178"
+"twitter","irrelevant","126881392956153856"
+"twitter","irrelevant","126881358848065536"
+"twitter","irrelevant","126881232997974016"
+"twitter","irrelevant","126881227729928193"
+"twitter","irrelevant","126881216468226048"
+"twitter","irrelevant","126881169169063937"
+"twitter","irrelevant","126881114936717313"
+"twitter","irrelevant","126881095378665473"
+"twitter","irrelevant","126881035748261888"
+"twitter","irrelevant","126881008002940928"
+"twitter","irrelevant","126880978324037632"
+"twitter","irrelevant","126880976096858112"
+"twitter","irrelevant","126880901903826945"
+"twitter","irrelevant","126880815928975360"
+"twitter","irrelevant","126880805610987520"
+"twitter","irrelevant","126880734152634368"
+"twitter","irrelevant","126880709427208192"
+"twitter","irrelevant","126880705996259328"
+"twitter","irrelevant","126880699587371008"
+"twitter","irrelevant","126880644105121792"
+"twitter","irrelevant","126880580485910529"
+"twitter","irrelevant","126880556775522304"
+"twitter","irrelevant","126880519391686656"
+"twitter","irrelevant","126880484797063168"
+"twitter","irrelevant","126880477943570433"
+"twitter","irrelevant","126880468833550336"
+"twitter","irrelevant","126880436906491904"
+"twitter","irrelevant","126880353817337856"
+"twitter","irrelevant","126880217124974594"
+"twitter","irrelevant","126880194588975104"
+"twitter","irrelevant","126880098401001473"
+"twitter","irrelevant","126880095334973440"
+"twitter","irrelevant","126880015357984768"
+"twitter","irrelevant","126879958529343488"
+"twitter","irrelevant","126879710054580224"
+"twitter","irrelevant","126879705046597632"
+"twitter","irrelevant","126879692635635712"
+"twitter","irrelevant","126879567385337856"
+"twitter","irrelevant","126879538415271936"
+"twitter","irrelevant","126879417220874240"
+"twitter","irrelevant","126879341559824384"
+"twitter","irrelevant","126879308663894016"
+"twitter","irrelevant","126879295195987968"
+"twitter","irrelevant","126879277722505216"
+"twitter","irrelevant","126879219484606464"
+"twitter","irrelevant","126879210177441792"
+"twitter","irrelevant","126879164258201601"
+"twitter","irrelevant","126879138605834240"
+"twitter","irrelevant","126879046071103488"
+"twitter","irrelevant","126878948431900672"
+"twitter","irrelevant","126878924411125760"
+"twitter","irrelevant","126878914625802241"
+"twitter","irrelevant","126878849656037377"
+"twitter","irrelevant","126878819826139136"
+"twitter","irrelevant","126878801970995200"
+"twitter","irrelevant","126878766675918848"
+"twitter","irrelevant","126878720098177024"
+"twitter","irrelevant","126878709192990720"
+"twitter","irrelevant","126878580494962688"
+"twitter","irrelevant","126878489935753216"
+"twitter","irrelevant","126878477424148480"
+"twitter","irrelevant","126878461464821760"
+"twitter","irrelevant","126878337342771201"
+"twitter","irrelevant","126878307617746944"
+"twitter","irrelevant","126878294867058688"
+"twitter","irrelevant","126878250541645824"
+"twitter","irrelevant","126878097030123520"
+"twitter","irrelevant","126878057662398464"
+"twitter","irrelevant","126878055091277824"
+"twitter","irrelevant","126877892855611392"
+"twitter","irrelevant","126877830150762496"
+"twitter","irrelevant","126877791911292928"
+"twitter","irrelevant","126877775830327296"
+"twitter","irrelevant","126877738702344192"
+"twitter","irrelevant","126877737410502659"
+"twitter","irrelevant","126877693563244544"
+"twitter","irrelevant","126877625867190273"
+"twitter","irrelevant","126877589955543040"
+"twitter","irrelevant","126877547899269120"
+"twitter","irrelevant","126877547878289408"
+"twitter","irrelevant","126877547710521344"
+"twitter","irrelevant","126877547576311808"
+"twitter","irrelevant","126877547244945408"
+"twitter","irrelevant","126877540928331777"
+"twitter","irrelevant","126877498981089280"
+"twitter","irrelevant","126877457675595776"
+"twitter","irrelevant","126877421919141889"
+"twitter","irrelevant","126877358740344832"
+"twitter","irrelevant","126877288418639872"
+"twitter","irrelevant","126877230587576320"
+"twitter","irrelevant","126877113348403201"
+"twitter","irrelevant","126876910788673536"
+"twitter","irrelevant","126876805134159872"
+"twitter","irrelevant","126876741913415681"
+"twitter","irrelevant","126876728206438400"
+"twitter","irrelevant","126876695989985280"
+"twitter","irrelevant","126876651551338496"
+"twitter","irrelevant","126876632874106881"
+"twitter","irrelevant","126876630491729920"
+"twitter","irrelevant","126876586170523648"
+"twitter","irrelevant","126876547025084416"
+"twitter","irrelevant","126876538653245440"
+"twitter","irrelevant","126876463965278208"
+"twitter","irrelevant","126876452326080512"
+"twitter","irrelevant","126876390388793344"
+"twitter","irrelevant","126876299556962305"
+"twitter","irrelevant","126876220460761088"
+"twitter","irrelevant","126876206003003392"
+"twitter","irrelevant","126876188294643712"
+"twitter","irrelevant","126876082593992704"
+"twitter","irrelevant","126876080538787841"
+"twitter","irrelevant","126876073337159680"
+"twitter","irrelevant","126876037689774080"
+"twitter","irrelevant","126875903266529281"
+"twitter","irrelevant","126875893481209858"
+"twitter","irrelevant","126875748484124672"
+"twitter","irrelevant","126875741869719552"
+"twitter","irrelevant","126875719912538112"
+"twitter","irrelevant","126875637720940544"
+"twitter","irrelevant","126875605395456001"
+"twitter","irrelevant","126875567168561152"
+"twitter","irrelevant","126875553314783232"
+"twitter","irrelevant","126875508662210560"
+"twitter","irrelevant","126875401887821825"
+"twitter","irrelevant","126875254437056512"
+"twitter","irrelevant","126875239438229504"
+"twitter","irrelevant","126875209897750528"
+"twitter","irrelevant","126875201072922624"
+"twitter","irrelevant","126875171008163840"
+"twitter","irrelevant","126875123457331200"
+"twitter","irrelevant","126875039621578752"
+"twitter","irrelevant","126875034135433216"
+"twitter","irrelevant","126875031983759360"
+"twitter","irrelevant","126875028359888896"
+"twitter","irrelevant","126875011221946368"
+"twitter","irrelevant","126874994180497408"
+"twitter","irrelevant","126874967433428992"
+"twitter","irrelevant","126874894943260674"
+"twitter","irrelevant","126874749321216000"
+"twitter","irrelevant","126874719772356608"
+"twitter","irrelevant","126874707780841472"
+"twitter","irrelevant","126874706119888896"
+"twitter","irrelevant","126874654055989248"
+"twitter","irrelevant","126874645587701760"
+"twitter","irrelevant","126874549550714880"
+"twitter","irrelevant","126874482798370816"
+"twitter","irrelevant","126874479291924480"
+"twitter","irrelevant","126874451500474368"
+"twitter","irrelevant","126874366775525377"
+"twitter","irrelevant","126874348110888960"
+"twitter","irrelevant","126874313423994880"
+"twitter","irrelevant","126874273280303104"
+"twitter","irrelevant","126874268779810817"
+"twitter","irrelevant","126874226450903041"
+"twitter","irrelevant","126874165105008640"
+"twitter","irrelevant","126874164039659520"
+"twitter","irrelevant","126874157840474113"
+"twitter","irrelevant","126874145408561152"
+"twitter","irrelevant","126874136017518593"
+"twitter","irrelevant","126874079239217153"
+"twitter","irrelevant","126874040261545985"
+"twitter","irrelevant","126874002084990976"
+"twitter","irrelevant","126873944501399552"
+"twitter","irrelevant","126873912515624960"
+"twitter","irrelevant","126873902499635200"
+"twitter","irrelevant","126873886938763264"
+"twitter","irrelevant","126873874909507584"
+"twitter","irrelevant","126873816319262721"
+"twitter","irrelevant","126873785512116225"
+"twitter","irrelevant","126873756437200896"
+"twitter","irrelevant","126873660442148865"
+"twitter","irrelevant","126873649725718528"
+"twitter","irrelevant","126873596080558082"
+"twitter","irrelevant","126873574895140864"
+"twitter","irrelevant","126873447912587264"
+"twitter","irrelevant","126873417126383616"
+"twitter","irrelevant","126873407487881217"
+"twitter","irrelevant","126873260385239040"
+"twitter","irrelevant","126873138079346688"
+"twitter","irrelevant","126873037982281729"
+"twitter","irrelevant","126873004494954496"
+"twitter","irrelevant","126872948022849536"
+"twitter","irrelevant","126872939638439936"
+"twitter","irrelevant","126872936568201216"
+"twitter","irrelevant","126872919480610816"
+"twitter","irrelevant","126872906738319360"
+"twitter","irrelevant","126872886232363008"
+"twitter","irrelevant","126872881417293824"
+"twitter","irrelevant","126872771929182209"
+"twitter","irrelevant","126872763221819392"
+"twitter","irrelevant","126872615380987905"
+"twitter","irrelevant","126872483394621440"
+"twitter","irrelevant","126872365211725824"
+"twitter","irrelevant","126872362007277568"
+"twitter","irrelevant","126872361462005760"
+"twitter","irrelevant","126872326095638528"
+"twitter","irrelevant","126872241693667328"
+"twitter","irrelevant","126872218025213952"
+"twitter","irrelevant","126872199620591617"
+"twitter","irrelevant","126872143593095168"
+"twitter","irrelevant","126872127986073600"
+"twitter","irrelevant","126872084679892993"
+"twitter","irrelevant","126872039138131968"
+"twitter","irrelevant","126872023552102400"
+"twitter","irrelevant","126871950185345024"
+"twitter","irrelevant","126871942799175682"
+"twitter","irrelevant","126871914579898369"
+"twitter","irrelevant","126871909177626626"
+"twitter","irrelevant","126871907302785024"
+"twitter","irrelevant","126871890890461184"
+"twitter","irrelevant","126871890320035841"
+"twitter","irrelevant","126871852185436160"
+"twitter","irrelevant","126871831583002625"
+"twitter","irrelevant","126871719557341184"
+"twitter","irrelevant","126871696887132160"
+"twitter","irrelevant","126871658991599616"
+"twitter","irrelevant","126871512195145729"
+"twitter","irrelevant","126871511305961473"
+"twitter","irrelevant","126871498832097280"
+"twitter","irrelevant","126871426107047936"
+"twitter","irrelevant","126871400500834304"
+"twitter","irrelevant","126871372109594624"
+"twitter","irrelevant","126871277813239808"
+"twitter","irrelevant","126871263250620416"
+"twitter","irrelevant","126871213732671488"
+"twitter","irrelevant","126871199362990081"
+"twitter","irrelevant","126871111416819712"
+"twitter","irrelevant","126871089929392128"
+"twitter","irrelevant","126871084950749184"
+"twitter","irrelevant","126870987559026688"
+"twitter","irrelevant","126870943330091008"
+"twitter","irrelevant","126870919590330369"
+"twitter","irrelevant","126870916729810944"
+"twitter","irrelevant","126870873176150017"
+"twitter","irrelevant","126870813839335424"
+"twitter","irrelevant","126870802837680128"
+"twitter","irrelevant","126870727617024000"
+"twitter","irrelevant","126870682767343617"
+"twitter","irrelevant","126870669492359169"
+"twitter","irrelevant","126870663758757888"
+"twitter","irrelevant","126870566916456448"
+"twitter","irrelevant","126870498452832256"
+"twitter","irrelevant","126870493402898434"
+"twitter","irrelevant","126870445659131904"
+"twitter","irrelevant","126870402910797826"
+"twitter","irrelevant","126870391774908416"
+"twitter","irrelevant","126870381867966465"
+"twitter","irrelevant","126870318764662784"
+"twitter","irrelevant","126870301471551489"
+"twitter","irrelevant","126870289861718016"
+"twitter","irrelevant","126870079798378497"
+"twitter","irrelevant","126870058050912256"
+"twitter","irrelevant","126870004766478336"
+"twitter","irrelevant","126869995610324993"
+"twitter","irrelevant","126869990535217152"
+"twitter","irrelevant","126869983769788417"
+"twitter","irrelevant","126869957085634560"
+"twitter","irrelevant","126869921996091392"
+"twitter","irrelevant","126869901922152448"
+"twitter","irrelevant","126869798419300352"
+"twitter","irrelevant","126869762763522049"
+"twitter","irrelevant","126869705620332544"
+"twitter","irrelevant","126869683449233408"
+"twitter","irrelevant","126869567061495808"
+"twitter","irrelevant","126869527416938496"
+"twitter","irrelevant","126869515983257600"
+"twitter","irrelevant","126869499046670336"
+"twitter","irrelevant","126869440276078592"
+"twitter","irrelevant","126869422857142272"
+"twitter","irrelevant","126869344851472384"
+"twitter","irrelevant","126869327222804480"
+"twitter","irrelevant","126869308029669376"
+"twitter","irrelevant","126869231693348865"
+"twitter","irrelevant","126869213980786688"
+"twitter","irrelevant","126869163250679810"
+"twitter","irrelevant","126869159341604866"
+"twitter","irrelevant","126869146045644800"
+"twitter","irrelevant","126869110901575680"
+"twitter","irrelevant","126869094610907136"
+"twitter","irrelevant","126869070292320256"
+"twitter","irrelevant","126869031180451840"
+"twitter","irrelevant","126869021969756161"
+"twitter","irrelevant","126868980664250369"
+"twitter","irrelevant","126868924590600192"
+"twitter","irrelevant","126868846517817344"
+"twitter","irrelevant","126868773562089472"
+"twitter","irrelevant","126868680003960833"
+"twitter","irrelevant","126868673213378560"
+"twitter","irrelevant","126868623645089792"
+"twitter","irrelevant","126868612408553472"
+"twitter","irrelevant","126868611922010112"
+"twitter","irrelevant","126868601721462784"
+"twitter","irrelevant","126868586882007041"
+"twitter","irrelevant","126868539784183808"
+"twitter","irrelevant","126868539641577472"
+"twitter","irrelevant","126868472515928064"
+"twitter","irrelevant","126868462336348162"
+"twitter","irrelevant","126868364927827968"
+"twitter","irrelevant","126868357625548800"
+"twitter","irrelevant","126868352835661824"
+"twitter","irrelevant","126868349396324352"
+"twitter","irrelevant","126868328659693568"
+"twitter","irrelevant","126868322431143937"
+"twitter","irrelevant","126868256479920128"
+"twitter","irrelevant","126868229409865728"
+"twitter","irrelevant","126868208924884993"
+"twitter","irrelevant","126868204881580032"
+"twitter","irrelevant","126868198032281600"
+"twitter","irrelevant","126868196295843840"
+"twitter","irrelevant","126868167921373185"
+"twitter","irrelevant","126868148879241216"
+"twitter","irrelevant","126868137827250176"
+"twitter","irrelevant","126868136531197952"
+"twitter","irrelevant","126867955844788225"
+"twitter","irrelevant","126867858461433857"
+"twitter","irrelevant","126867818242248704"
+"twitter","irrelevant","126867784494891008"
+"twitter","irrelevant","126867765343686656"
+"twitter","irrelevant","126867764282540032"
+"twitter","irrelevant","126867734490382336"
+"twitter","irrelevant","126867715947368449"
+"twitter","irrelevant","126867611546943490"
+"twitter","irrelevant","126867510288072705"
+"twitter","irrelevant","126867253609234433"
+"twitter","irrelevant","126867181358170112"
+"twitter","irrelevant","126867170742374400"
+"twitter","irrelevant","126867149775044608"
+"twitter","irrelevant","126867019646771201"
+"twitter","irrelevant","126867015620239360"
+"twitter","irrelevant","126867000030007296"
+"twitter","irrelevant","126866948909842432"
+"twitter","irrelevant","126866905096130560"
+"twitter","irrelevant","126866882979573760"
+"twitter","irrelevant","126866835449708544"
+"twitter","irrelevant","126866833537110016"
+"twitter","irrelevant","126866827715420160"
+"twitter","irrelevant","126866789962498048"
+"twitter","irrelevant","126866773705375744"
+"twitter","irrelevant","126866759792852992"
+"twitter","irrelevant","126866701877903361"
+"twitter","irrelevant","126866689236283392"
+"twitter","irrelevant","126866515122331648"
+"twitter","irrelevant","126866508818296832"
+"twitter","irrelevant","126866474806673408"
+"twitter","irrelevant","126866454845984768"
+"twitter","irrelevant","126866426521849857"
+"twitter","irrelevant","126866385757417472"
+"twitter","irrelevant","126866312130609152"
+"twitter","irrelevant","126866267998134272"
+"twitter","irrelevant","126866222355722240"
+"twitter","irrelevant","126866176889454592"
+"twitter","irrelevant","126866076016447489"
+"twitter","irrelevant","126866058337460224"
+"twitter","irrelevant","126866027316383744"
+"twitter","irrelevant","126866021402427392"
+"twitter","irrelevant","126865987365634048"
+"twitter","irrelevant","126865977970401280"
+"twitter","irrelevant","126865966624813057"
+"twitter","irrelevant","126865936115445761"
+"twitter","irrelevant","126865932806139904"
+"twitter","irrelevant","126865890665967618"
+"twitter","irrelevant","126865868599726080"
+"twitter","irrelevant","126865864086667264"
+"twitter","irrelevant","126865777423941633"
+"twitter","irrelevant","126865704380153856"
+"twitter","irrelevant","126865703969099776"
+"twitter","irrelevant","126865661602447362"
+"twitter","irrelevant","126865633869709312"
+"twitter","irrelevant","126865576093171712"
+"twitter","irrelevant","126865573228445696"
+"twitter","irrelevant","126865566236553216"
+"twitter","irrelevant","126865487584968704"
+"twitter","irrelevant","126865416671862785"
+"twitter","irrelevant","126865410598502400"
+"twitter","irrelevant","126865389861871617"
+"twitter","irrelevant","126865304277090304"
+"twitter","irrelevant","126865296463118337"
+"twitter","irrelevant","126865215492071424"
+"twitter","irrelevant","126865127977918464"
+"twitter","irrelevant","126865092565417984"
+"twitter","irrelevant","126865045949919233"
+"twitter","irrelevant","126865038676987904"
+"twitter","irrelevant","126865014215802880"
+"twitter","irrelevant","126864993760182272"
+"twitter","irrelevant","126864955285831680"
+"twitter","irrelevant","126864954140803072"
+"twitter","irrelevant","126864917566455809"
+"twitter","irrelevant","126864908582260736"
+"twitter","irrelevant","126864848649846784"
+"twitter","irrelevant","126864847399944192"
+"twitter","irrelevant","126864712766980096"
+"twitter","irrelevant","126864692470751232"
+"twitter","irrelevant","126864673416032256"
+"twitter","irrelevant","126864623252144128"
+"twitter","irrelevant","126864485943214080"
+"twitter","irrelevant","126864460957749248"
+"twitter","irrelevant","126864441999507456"
+"twitter","irrelevant","126864344125419520"
+"twitter","irrelevant","126864341071962112"
+"twitter","irrelevant","126864316120047617"
+"twitter","irrelevant","126864259027181568"
+"twitter","irrelevant","126864201804300289"
+"twitter","irrelevant","126864188969725953"
+"twitter","irrelevant","126864074045784064"
+"twitter","irrelevant","126864072447766528"
+"twitter","irrelevant","126864016613183489"
+"twitter","irrelevant","126863938339094531"
+"twitter","irrelevant","126863859075121152"
+"twitter","irrelevant","126863845326209024"
+"twitter","irrelevant","126863808177254400"
+"twitter","irrelevant","126863734265217025"
+"twitter","irrelevant","126863732931432448"
+"twitter","irrelevant","126863723179675648"
+"twitter","irrelevant","126863722412118017"
+"twitter","irrelevant","126863708889690112"
+"twitter","irrelevant","126863696726200320"
+"twitter","irrelevant","126863647845789696"
+"twitter","irrelevant","126863644968501250"
+"twitter","irrelevant","126863594850746369"
+"twitter","irrelevant","126863579914829824"
+"twitter","irrelevant","126863563133431809"
+"twitter","irrelevant","126863551355822080"
+"twitter","irrelevant","126863538626109440"
+"twitter","irrelevant","126863416211156992"
+"twitter","irrelevant","126863368492564481"
+"twitter","irrelevant","126863360779227136"
+"twitter","irrelevant","126863360686956544"
+"twitter","irrelevant","126863321327611904"
+"twitter","irrelevant","126863319226265600"
+"twitter","irrelevant","126863283658567680"
+"twitter","irrelevant","126863189844566016"
+"twitter","irrelevant","126863136111333376"
+"twitter","irrelevant","126863118398787584"
+"twitter","irrelevant","126863108802232320"
+"twitter","irrelevant","126863108688986112"
+"twitter","irrelevant","126863078112509952"
+"twitter","irrelevant","126863012433899520"
+"twitter","irrelevant","126862981333139457"
+"twitter","irrelevant","126862918921891840"
+"twitter","irrelevant","126862904514445312"
+"twitter","irrelevant","126862853822099456"
+"twitter","irrelevant","126862852681240576"
+"twitter","irrelevant","126862832489861121"
+"twitter","irrelevant","126862813355442176"
+"twitter","irrelevant","126862789649248256"
+"twitter","irrelevant","126862725933576192"
+"twitter","irrelevant","126862722028666880"
+"twitter","irrelevant","126862722007699456"
+"twitter","irrelevant","126862721793789953"
+"twitter","irrelevant","126862721609248769"
+"twitter","irrelevant","126862719927332864"
+"twitter","irrelevant","126862668039598080"
+"twitter","irrelevant","126862654324211713"
+"twitter","irrelevant","126862632648065025"
+"twitter","irrelevant","126862595117424641"
+"twitter","irrelevant","126862536162295808"
+"twitter","irrelevant","126862464003477504"
+"twitter","irrelevant","126862447041712128"
+"twitter","irrelevant","126862429912186880"
+"twitter","irrelevant","126862343148802048"
+"twitter","irrelevant","126862329269858305"
+"twitter","irrelevant","126862315990679552"
+"twitter","irrelevant","126862308302532608"
+"twitter","irrelevant","126862269148692481"
+"twitter","irrelevant","126862268725080065"
+"twitter","irrelevant","126862217013497856"
+"twitter","irrelevant","126862201440043008"
+"twitter","irrelevant","126862151393619968"
+"twitter","irrelevant","126862151343284224"
+"twitter","irrelevant","126862150265352193"
+"twitter","irrelevant","126862116874489856"
+"twitter","irrelevant","126861992001679360"
+"twitter","irrelevant","126861988004499457"
+"twitter","irrelevant","126861979762696192"
+"twitter","irrelevant","126861960137539585"
+"twitter","irrelevant","126861955762888706"
+"twitter","irrelevant","126861933071704065"
+"twitter","irrelevant","126861782378745856"
+"twitter","irrelevant","126861771314184192"
+"twitter","irrelevant","126861717790658561"
+"twitter","irrelevant","126861611741872128"
+"twitter","irrelevant","126861593249185793"
+"twitter","irrelevant","126861589122002944"
+"twitter","irrelevant","126861539511775232"
+"twitter","irrelevant","126861496109121536"
+"twitter","irrelevant","126861493059854336"
+"twitter","irrelevant","126861480128819201"
+"twitter","irrelevant","126861380992241664"
+"twitter","irrelevant","126861357831303168"
+"twitter","irrelevant","126861252134830080"
+"twitter","irrelevant","126861245109387264"
+"twitter","irrelevant","126861233138827264"
+"twitter","irrelevant","126861218592980992"
+"twitter","irrelevant","126861187605471232"
+"twitter","irrelevant","126861118684676096"
+"twitter","irrelevant","126861100120686592"
+"twitter","irrelevant","126860970583797760"
+"twitter","irrelevant","126860953450065920"
+"twitter","irrelevant","126860911557345280"
+"twitter","irrelevant","126860890749419521"
+"twitter","irrelevant","126860877344407552"
+"twitter","irrelevant","126860835892109312"
+"twitter","irrelevant","126860830942834689"
+"twitter","irrelevant","126860808251637762"
+"twitter","irrelevant","126860789536657408"
+"twitter","irrelevant","126860754912681985"
+"twitter","irrelevant","126860740689797120"
+"twitter","irrelevant","126860719860883458"
+"twitter","irrelevant","126860653167263744"
+"twitter","irrelevant","126860610020446209"
+"twitter","irrelevant","126860576600227841"
+"twitter","irrelevant","126860372987740160"
+"twitter","irrelevant","126860329463447552"
+"twitter","irrelevant","126860270181171201"
+"twitter","irrelevant","126860146289803265"
+"twitter","irrelevant","126860127167987712"
+"twitter","irrelevant","126860114610241536"
+"twitter","irrelevant","126860109090521088"
+"twitter","irrelevant","126859990513369088"
+"twitter","irrelevant","126859860640940032"
+"twitter","irrelevant","126859857604247552"
+"twitter","irrelevant","126859846506127360"
+"twitter","irrelevant","126859794383515649"
+"twitter","irrelevant","126859777308495872"
+"twitter","irrelevant","126859668151738368"
+"twitter","irrelevant","126859627051757568"
+"twitter","irrelevant","126859610006110208"
+"twitter","irrelevant","126859509414100992"
+"twitter","irrelevant","126859503495938048"
+"twitter","irrelevant","126859490061598720"
+"twitter","irrelevant","126859443127332864"
+"twitter","irrelevant","126859428455657472"
+"twitter","irrelevant","126859363079041024"
+"twitter","irrelevant","126859354614939648"
+"twitter","irrelevant","126859286558158849"
+"twitter","irrelevant","126859257856540672"
+"twitter","irrelevant","126859211593351168"
+"twitter","irrelevant","126859155175772161"
+"twitter","irrelevant","126859077304320001"
+"twitter","irrelevant","126858835058098176"
+"twitter","irrelevant","126858789868670976"
+"twitter","irrelevant","126858639855206400"
+"twitter","irrelevant","126858516655898625"
+"twitter","irrelevant","126858260216160256"
+"twitter","irrelevant","126858248325308416"
+"twitter","irrelevant","126858186962644992"
+"twitter","irrelevant","126858148878368769"
+"twitter","irrelevant","126858034466131969"
+"twitter","irrelevant","126858032951996416"
+"twitter","irrelevant","126858004690767872"
+"twitter","irrelevant","126857921068941314"
+"twitter","irrelevant","126857918929838080"
+"twitter","irrelevant","126857746200014849"
+"twitter","irrelevant","126857736238530560"
+"twitter","irrelevant","126857518591901698"
+"twitter","irrelevant","126857511230902272"
+"twitter","irrelevant","126857421321797634"
+"twitter","irrelevant","126857383715676160"
+"twitter","irrelevant","126857361515216897"
+"twitter","irrelevant","126857211921174528"
+"twitter","irrelevant","126857080513638400"
+"twitter","irrelevant","126857071667847168"
+"twitter","irrelevant","126857049920385024"
+"twitter","irrelevant","126856764242137088"
+"twitter","irrelevant","126856732331884545"
+"twitter","irrelevant","126856603021484032"
+"twitter","irrelevant","126856425371746304"
+"twitter","irrelevant","126856274531991552"
+"twitter","irrelevant","126856135918620673"
+"twitter","irrelevant","126856097863708672"
+"twitter","irrelevant","126856097431699456"
+"twitter","irrelevant","126855687060987904"
+"twitter","irrelevant","126855171702661120"
+"twitter","irrelevant","126854999442587648"
+"twitter","irrelevant","126854818101858304"
+"twitter","irrelevant","126854423317188608"
diff --git a/ch06/data/missing.tsv b/ch06/data/missing.tsv
new file mode 100644
index 00000000..31778a5e
--- /dev/null
+++ b/ch06/data/missing.tsv
@@ -0,0 +1,736 @@
+126366123368267776
+126354605130002432
+126366123368267776
+126354605130002432
+126350948548354048
+126366123368267776
+126354605130002432
+126350948548354048
+126344048637259776
+126312877916307458
+126267185025916928
+126093298619252737
+126059399319003136
+126039929523404801
+126015087386431488
+125979338846900224
+125978473712979969
+125954443643588608
+125932869389524992
+125930171562852353
+125910633731461120
+125909565031198720
+125907732388790272
+125898611572740097
+125816853867151360
+125688922410975232
+125685656415510528
+125673004511412224
+125665606853861376
+125616280215617537
+125595292304281601
+125536884813336576
+125533599737978882
+125458901192810496
+125445056218923008
+125422502284505088
+125402636764712960
+125330595302744064
+125279447669669888
+125251672896323584
+126328782700285952
+126325125749542913
+126312535203921920
+126311879218966529
+126308005779210241
+126304942049853441
+126297241190281216
+126295434862936064
+126282994821509120
+126270073420791810
+126264313563459585
+126239832895795200
+126156590662422528
+126140389873827841
+126126605344047105
+126082198720888833
+126073788323479552
+126070647125327872
+126060639268507649
+126042137740574720
+126042022900547584
+126033747991736320
+126012404332113920
+126012089415380992
+126006088725303296
+126004661248471040
+125974810021998595
+125969502587465729
+125934808592433153
+125836461936361472
+125824054958637056
+125814380871946240
+125812985301172224
+125811345064067072
+125794882257305600
+125728250579259392
+125722610179907584
+125711996074209280
+125694815743651840
+125692685033021441
+125692532750430209
+125656618326175745
+125619303356710912
+125588697872728065
+125578269197217792
+125547255947198465
+125441732941840385
+125435218017525760
+125420263687995392
+125400161886277632
+125395636219678720
+125380163302199296
+125355139409252352
+125347619072512000
+125330337847975937
+125304159581900800
+125295729139908608
+125294978623746048
+125271422431014914
+125268117680160768
+125245246136258561
+125212404299735040
+126339328434651136
+126329388320043008
+126314701721309184
+126312423132102657
+126307117274644480
+126298592364331008
+126293725155569664
+126292233963053056
+126287512296632320
+126192452297170945
+126186955296878592
+126147867478982656
+126125799744094208
+126102213956337664
+126082898783780864
+126081812236738560
+126054725727698944
+126044185815040000
+126043954641780736
+126021436149211136
+126017685246050304
+126006669535744000
+125998732046123009
+125996412252078080
+125974497546338304
+125971256335024128
+125957972466737152
+125950026181648385
+125949784677810176
+125947460592996352
+125725019178409984
+125724524732882944
+125714971261812736
+125706125764083712
+125687710705926144
+125686643960193024
+125667159547461633
+125663914552868864
+125643523792969728
+125640679325052929
+125632687879884800
+125607526967410689
+125559846870323200
+125559232157327360
+125551370676862976
+125532364406398977
+125532202057482240
+125493125098635265
+125459423870197760
+125459375245635584
+125417324621737985
+125374041556127744
+125365963972542464
+125365264656236544
+125364122853453824
+125354296752619520
+125338231280644096
+125312789345599489
+125287442407362560
+125276525472911360
+125273317673414656
+125270965268643840
+125227689895407616
+125206271560384512
+126355839274594304
+126329876935479296
+126312509983559681
+126303310054559744
+126298834203713536
+126298134212120577
+126297754799587328
+126252530819809280
+126228762596618240
+126131070050639874
+126111632773480448
+126093841232166912
+126061579233017856
+126052637014630400
+126007147199868928
+125993438205321218
+125993305325576193
+125992545552576512
+125990754769309696
+125909170074562561
+125873952953352192
+125641150186000384
+125580342244548608
+125447473786392576
+125445752083329025
+125445407592546304
+125287180561166336
+126528978239496194
+126521613259771904
+126521382220738560
+126511837796765696
+126501732443361280
+126499581520384001
+126495221373607936
+126494344566943744
+126493683515260928
+126493037110099968
+126491589852930048
+126491177754165252
+126487901084196864
+126487323071365120
+126487172487462912
+126524840126582784
+126505113362776064
+126497333209858049
+126491825660887040
+126490767345725441
+126489813468721152
+126489698314104832
+126531161215344640
+126529287858819072
+126528342613700608
+126524843339431936
+126524841544257536
+126524050037145601
+126523519357030400
+126521718272573440
+126521573330010112
+126518280633593856
+126512460629946370
+126508495255257088
+126506073099218945
+126504479595044864
+126503755331022849
+126501463487815680
+126501209887621121
+126499307284217856
+126499217807122433
+126497642493648897
+126496131353018368
+126494715024637952
+126494709605609472
+126493625285750784
+126492726601584640
+126492692984238080
+126491267411619840
+126490760114733056
+126490235877081088
+126490118050684928
+126486926827065344
+126533452618137600
+126533318928896001
+126533008760111104
+126531797419950080
+126528826279866369
+126526602686369792
+126522478280450048
+126519390245109760
+126515137212456960
+126513979706183682
+126513949163257856
+126509472834269184
+126508734884220929
+126502546486464512
+126495448436453377
+126494774428565504
+126493833608441856
+126493282355261440
+126492011896373249
+126489808913694720
+126488234359406593
+126487140317147136
+126774092274741248
+126764085609111552
+126748156003221504
+126688181688078338
+126782820709441536
+126781543879421952
+126686283769712640
+126686075287642112
+126672651459633152
+126644615280463872
+126643061567668224
+126615502259879938
+126606967736238080
+126606323625361408
+126510880761458688
+126799514270040064
+126797364823457792
+126786021340884992
+126781384600727552
+126776771159916546
+126768749243146240
+126767049312714752
+126764605623111680
+126747117120274432
+126746562062848001
+126741817365504000
+126734902514286593
+126727087770963968
+126725006729281536
+126723741651714048
+126719724376702976
+126715414716747777
+126713393599086592
+126707947744145409
+126705491685556225
+126700451163942912
+126698757503991808
+126690446872297473
+126689592832294913
+126688185228079104
+126687847762771970
+126685318899449856
+126679050742800385
+126678492464152576
+126677952644648960
+126677050265305088
+126671664011419648
+126668119354781696
+126664410029764608
+126651699380232193
+126651363718467585
+126637516148318208
+126636379324170240
+126635080100757504
+126634780954595328
+126625578567729152
+126615663119843328
+126615649077301248
+126613447487127553
+126604828150480896
+126603598724476928
+126602791807496192
+126601553409875968
+126596088999313408
+126595447455367168
+126586994536091648
+126572275876106240
+126567868065263616
+126563375189725184
+126545157708451842
+126542412226105345
+126466763746574337
+126767886814543872
+126765812534099969
+126759115488903168
+126754698102439936
+126749255498088448
+126746549756768258
+126737489229656066
+126710657948196864
+126694595026366464
+126691436744810496
+126688740826550272
+126688659868106752
+126687120071999490
+126679305169289216
+126678301539446784
+126675085758173187
+126671792877223936
+126669192157073408
+126666155376132097
+126662553316245504
+126656806368968704
+126655411871940608
+126646647856955392
+126646439924334592
+126644764383780865
+126622031163822081
+126622030006202368
+126614145662599169
+126614144299446272
+126605409590063104
+126573790980358145
+126572846272086016
+126560733126328321
+126559394136723456
+126546297082748928
+126544031365873664
+126532894272397312
+126491523020898304
+126489614272827392
+126883211006590976
+126882408963391488
+126878670685085696
+126876733113778176
+126875611095502848
+126875378013843456
+126872175490764802
+126863862791282688
+126863766334873600
+126858606695030784
+126878307693244417
+126877679826894849
+126874389210861568
+126873860745330689
+126872684658294784
+126872325663621120
+126872265328562176
+126870618825179136
+126868404182319105
+126867983455879168
+126867188677218304
+126864203557507072
+126863505851809793
+126858852976181250
+126857136855719936
+126883005263392768
+126882559522111488
+126882518799626241
+126882453943103488
+126882193967550464
+126881072167399425
+126880926268526592
+126880518410215425
+126879768481247232
+126879759316697088
+126879221724356608
+126878654927077376
+126878539487252480
+126877936514105344
+126877612214726657
+126877527674322944
+126876676822007809
+126875958694260736
+126875579172663296
+126875301157404672
+126875080084033536
+126875035817349120
+126874922017488896
+126874883207610368
+126874543770976256
+126874384139948032
+126873749558530049
+126873190420062208
+126872871254491137
+126872821375827968
+126872360052736000
+126872201663229952
+126872199838699520
+126871747759837185
+126871411431194624
+126870940570226688
+126870651112927232
+126870354722439169
+126869816584839168
+126869769608642560
+126869448069095424
+126869197413285888
+126868633195524096
+126867869236604928
+126867774738927617
+126867417401012225
+126867201776037888
+126867147673714688
+126866981377941504
+126866963887689728
+126866234712145920
+126866190193790976
+126865038479867904
+126864793373122560
+126864141561507840
+126864131289661441
+126863876066254848
+126863814619709441
+126863409680625664
+126863392764989440
+126863206193963008
+126863072269840384
+126862999720951808
+126862946310692864
+126862552025137152
+126862174277738496
+126862106443255810
+126862030979334144
+126861895436206081
+126861820169437184
+126861727374643200
+126861294312759296
+126861285307584514
+126861181108498432
+126860898567589888
+126860800978722816
+126860504240107520
+126860495079735296
+126860390356357121
+126859485322035200
+126857800411398144
+126857481006751744
+126857475034071040
+126856857527648256
+126856848778342402
+126856541453291520
+126855856414404608
+126855838047547392
+126854358817181696
+126853667738497025
+126883583691472896
+126883226760384512
+126883074888826880
+126882954629742592
+126882436930994176
+126882291757752320
+126881216468226048
+126881035748261888
+126880976096858112
+126880815928975360
+126880468833550336
+126878924411125760
+126878097030123520
+126877625867190273
+126877421919141889
+126876651551338496
+126876632874106881
+126876080538787841
+126875605395456001
+126875031983759360
+126875028359888896
+126873649725718528
+126872483394621440
+126871426107047936
+126871400500834304
+126871213732671488
+126870727617024000
+126870682767343617
+126870318764662784
+126869422857142272
+126869327222804480
+126869163250679810
+126869146045644800
+126868680003960833
+126868673213378560
+126868539641577472
+126868148879241216
+126865661602447362
+126865092565417984
+126865038676987904
+126864917566455809
+126864623252144128
+126864344125419520
+126864072447766528
+126863859075121152
+126863723179675648
+126863644968501250
+126863416211156992
+126861960137539585
+126861593249185793
+126861357831303168
+126860740689797120
+126860329463447552
+126859668151738368
+126859077304320001
+126858148878368769
+126858034466131969
+126857049920385024
+126856274531991552
+126855171702661120
+126560733126328321
+126559394136723456
+126546297082748928
+126544031365873664
+126532894272397312
+126491523020898304
+126489614272827392
+126883211006590976
+126882408963391488
+126878670685085696
+126876733113778176
+126875611095502848
+126875378013843456
+126872175490764802
+126863862791282688
+126863766334873600
+126858606695030784
+126878307693244417
+126877679826894849
+126874389210861568
+126873860745330689
+126872684658294784
+126872325663621120
+126872265328562176
+126870618825179136
+126868404182319105
+126867983455879168
+126867188677218304
+126864203557507072
+126863505851809793
+126858852976181250
+126857136855719936
+126883005263392768
+126882559522111488
+126882518799626241
+126882453943103488
+126882193967550464
+126881072167399425
+126880926268526592
+126880518410215425
+126879768481247232
+126879759316697088
+126879221724356608
+126878654927077376
+126878539487252480
+126877936514105344
+126877612214726657
+126877527674322944
+126876676822007809
+126875958694260736
+126875579172663296
+126875301157404672
+126875080084033536
+126875035817349120
+126874922017488896
+126874883207610368
+126874543770976256
+126874384139948032
+126873749558530049
+126873190420062208
+126872871254491137
+126872821375827968
+126872360052736000
+126872201663229952
+126872199838699520
+126871747759837185
+126871411431194624
+126870940570226688
+126870651112927232
+126870354722439169
+126869816584839168
+126869769608642560
+126869448069095424
+126869197413285888
+126868633195524096
+126867869236604928
+126867774738927617
+126867417401012225
+126867201776037888
+126867147673714688
+126866981377941504
+126866963887689728
+126866234712145920
+126866190193790976
+126865038479867904
+126864793373122560
+126864141561507840
+126864131289661441
+126863876066254848
+126863814619709441
+126863409680625664
+126863392764989440
+126863206193963008
+126863072269840384
+126862999720951808
+126862946310692864
+126862552025137152
+126862174277738496
+126862106443255810
+126862030979334144
+126861895436206081
+126861820169437184
+126861727374643200
+126861294312759296
+126861285307584514
+126861181108498432
+126860898567589888
+126860800978722816
+126860504240107520
+126860495079735296
+126860390356357121
+126859485322035200
+126857800411398144
+126857481006751744
+126857475034071040
+126856857527648256
+126856848778342402
+126856541453291520
+126855856414404608
+126855838047547392
+126854358817181696
+126853667738497025
+126883583691472896
+126883226760384512
+126883074888826880
+126882954629742592
+126882436930994176
+126882291757752320
+126881216468226048
+126881035748261888
+126880976096858112
+126880815928975360
+126880468833550336
+126878924411125760
+126878097030123520
+126877625867190273
+126877421919141889
+126876651551338496
+126876632874106881
+126876080538787841
+126875605395456001
+126875031983759360
+126875028359888896
+126873649725718528
+126872483394621440
+126871426107047936
+126871400500834304
+126871213732671488
+126870727617024000
+126870682767343617
+126870318764662784
+126869422857142272
+126869327222804480
+126869163250679810
+126869146045644800
+126868680003960833
+126868673213378560
+126868539641577472
+126868148879241216
+126865661602447362
+126865092565417984
+126865038676987904
+126864917566455809
+126864623252144128
+126864344125419520
+126864072447766528
+126863859075121152
+126863723179675648
+126863644968501250
+126863416211156992
+126861960137539585
+126861593249185793
+126861357831303168
+126860740689797120
+126860329463447552
+126859668151738368
+126859077304320001
+126858148878368769
+126858034466131969
+126857049920385024
+126856274531991552
+126855171702661120
diff --git a/ch06/data/not_authorized.tsv b/ch06/data/not_authorized.tsv
new file mode 100644
index 00000000..5bb4f31b
--- /dev/null
+++ b/ch06/data/not_authorized.tsv
@@ -0,0 +1,371 @@
+126213333123743744
+126079414986485761
+126076743613284354
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+125264731035537409
+126153311521996800
+126121175926571009
+125988395787882497
+125954651152592896
+125799384976863232
+125681375058735104
+125675806977556480
+125673358418391041
+125659125886623744
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+125264731035537409
+126153311521996800
+126121175926571009
+125988395787882497
+125954651152592896
+125799384976863232
+125681375058735104
+125675806977556480
+125673358418391041
+125659125886623744
+125561930416013312
+125475953509015552
+125371779039502336
+125368089159286784
+125334519254482944
+125309427422203904
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+125264731035537409
+126153311521996800
+126121175926571009
+125988395787882497
+125954651152592896
+125799384976863232
+125681375058735104
+125675806977556480
+125673358418391041
+125659125886623744
+125561930416013312
+125475953509015552
+125371779039502336
+125368089159286784
+125334519254482944
+125309427422203904
+125204228967903232
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+125264731035537409
+126153311521996800
+126121175926571009
+125988395787882497
+125954651152592896
+125799384976863232
+125681375058735104
+125675806977556480
+125673358418391041
+125659125886623744
+125561930416013312
+125475953509015552
+125371779039502336
+125368089159286784
+125334519254482944
+125309427422203904
+125204228967903232
+126394795802370049
+126386085164101634
+126382776072146944
+126380323733909504
+126317201962700800
+126229089651654656
+126186795808456704
+126110770864979968
+126039090578735104
+126029114850295809
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+125264731035537409
+126153311521996800
+126121175926571009
+125988395787882497
+125954651152592896
+125799384976863232
+125681375058735104
+125675806977556480
+125673358418391041
+125659125886623744
+125561930416013312
+125475953509015552
+125371779039502336
+125368089159286784
+125334519254482944
+125309427422203904
+125204228967903232
+126394795802370049
+126386085164101634
+126382776072146944
+126380323733909504
+126317201962700800
+126229089651654656
+126186795808456704
+126110770864979968
+126039090578735104
+126029114850295809
+125994997609803776
+125992594395250688
+125988651426512899
+125981074114359297
+125980615664336896
+125958702455988225
+125932876721168384
+125918906215968771
+125725274317914112
+125708240225959936
+125641351848136704
+125630016485732352
+125629788563050496
+125538769632886784
+125347618862792705
+125305567148388352
+125196751387889665
+126213333123743744
+126079414986485761
+126076743613284354
+126049183865114624
+125633065757310976
+125264731035537409
+126153311521996800
+126121175926571009
+125988395787882497
+125954651152592896
+125930962545672192
+125910538550124545
+125797001337122817
+125232405517844481
+126534770095169536
+126520518609350656
+126516914678808578
+126494834449063936
+126494280318582784
+126494100252925954
+126492852615262208
+126488447098695680
+126488384410619906
+126487332865056768
+126532210210783232
+126520550876127232
+126505594290057216
+126497514168922112
+126494895501348864
+126491509527805952
+126528316978102272
+126528078057963520
+126523549493112832
+126520920352358401
+126510284536942592
+126504105530236928
+126499521344712704
+126497100866387969
+126496853742198784
+126494691016441857
+126494569184505856
+126493312650719232
+126487788433584129
+126534127435530240
+126529490582118400
+126528938326495232
+126526465280970752
+126526113131413504
+126519715085549568
+126511257170886656
+126504285436514304
+126497446955188224
+126495762568851456
+126495208505479168
+126494166145437696
+126493860804308992
+126492542610051072
+126490549367738368
+126484213737340928
+126784810755690496
+126700014385897472
+126635317108289536
+126795256225210368
+126789710705213440
+126728277896347649
+126674460131606529
+126671006302617600
+126593636627513344
+126519595682119681
+126796467213058048
+126734290850557952
+126726063484178432
+126679463839801344
+126673062258147328
+126637471676104704
+126622818220785664
+126622165595459584
+126612152579657728
+126506057613848576
+126505970317787136
+126495306681548800
+126879662851887104
+126877171926040576
+126876654118240257
+126876107881455616
+126867350476697601
+126863084433326080
+126857095088840706
+126883243726344193
+126881376074076161
+126858607789740032
+126883335875203072
+126883013236752384
+126882832319651840
+126878130353876992
+126877869547855872
+126875416760815616
+126875059477426176
+126870550546096128
+126868828457144321
+126868429796933632
+126868271625539585
+126867067776405506
+126866413053939712
+126865888724004864
+126865837800951808
+126865038085591041
+126864886402777088
+126864861576704000
+126863772877996034
+126863571912114177
+126862618836221954
+126860955605934081
+126859710740701185
+126853913591808002
+126882080050262016
+126881227729928193
+126879417220874240
+126875034135433216
+126874145408561152
+126873260385239040
+126872615380987905
+126872361462005760
+126872241693667328
+126872199620591617
+126869762763522049
+126868924590600192
+126868586882007041
+126868349396324352
+126867170742374400
+126866474806673408
+126866312130609152
+126865987365634048
+126864954140803072
+126864673416032256
+126863938339094531
+126862853822099456
+126862343148802048
+126860270181171201
+126860114610241536
+126859857604247552
+126506057613848576
+126505970317787136
+126495306681548800
+126879662851887104
+126877171926040576
+126876654118240257
+126876107881455616
+126867350476697601
+126863084433326080
+126857095088840706
+126883243726344193
+126881376074076161
+126858607789740032
+126883335875203072
+126883013236752384
+126882832319651840
+126878130353876992
+126877869547855872
+126875416760815616
+126875059477426176
+126870550546096128
+126868828457144321
+126868429796933632
+126868271625539585
+126867067776405506
+126866413053939712
+126865888724004864
+126865837800951808
+126865038085591041
+126864886402777088
+126864861576704000
+126863772877996034
+126863571912114177
+126862618836221954
+126860955605934081
+126859710740701185
+126853913591808002
+126882080050262016
+126881227729928193
+126879417220874240
+126875034135433216
+126874145408561152
+126873260385239040
+126872615380987905
+126872361462005760
+126872241693667328
+126872199620591617
+126869762763522049
+126868924590600192
+126868586882007041
+126868349396324352
+126867170742374400
+126866474806673408
+126866312130609152
+126865987365634048
+126864954140803072
+126864673416032256
+126863938339094531
+126862853822099456
+126862343148802048
+126860270181171201
+126860114610241536
+126859857604247552
diff --git a/ch06/install.py b/ch06/install.py
index 4caf90dc..be61f485 100644
--- a/ch06/install.py
+++ b/ch06/install.py
@@ -11,26 +11,19 @@
#
# Pulls tweet data from Twitter because ToS prevents distributing it directly.
#
-# Right now we use unauthenticated requests, which are rate-limited to 150/hr.
-# We use 125/hr to stay safe.
-#
-# We could more than double the download speed by using authentication with
-# OAuth logins. But for now, this is too much of a PITA to implement. Just let
-# the script run over a weekend and you'll have all the data.
-#
# - Niek Sanders
# njs@sananalytics.com
# October 20, 2011
#
#
-# Excuse the ugly code. I threw this together as quickly as possible and I
-# don't normally code in Python.
-#
# In Sanders' original form, the code was using Twitter API 1.0.
# Now that Twitter moved to 1.1, we had to make a few changes.
# Cf. twitterauth.py for the details.
+# Regarding rate limiting, please check
+# https://dev.twitter.com/rest/public/rate-limiting
+
import sys
import csv
import json
@@ -41,27 +34,37 @@
import twitter
except ImportError:
print("""\
-You need to install python-twitter.
-On Linux:
- sudo pip install python-twitter
-On Windows:
- Follow instructions at http://code.google.com/p/python-twitter/ """)
+You need to ...
+ pip install twitter
+If pip is not found you might have to install it using easy_install.
+If it does not work on your system, you might want to follow instructions
+at https://github.com/sixohsix/twitter, most likely:
+ $ git clone https://github.com/sixohsix/twitter
+ $ cd twitter
+ $ sudo python setup.py install
+""")
sys.exit(1)
from twitterauth import CONSUMER_KEY, CONSUMER_SECRET, ACCESS_TOKEN_KEY, ACCESS_TOKEN_SECRET
-api = twitter.Api(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET,
- access_token_key=ACCESS_TOKEN_KEY, access_token_secret=ACCESS_TOKEN_SECRET)
+api = twitter.Twitter(auth=twitter.OAuth(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET,
+ token=ACCESS_TOKEN_KEY, token_secret=ACCESS_TOKEN_SECRET))
+DATA_PATH = "data"
-def get_user_params(data_path):
+# for some reasons TWeets disappear. In this file we collect those
+MISSING_ID_FILE = os.path.join(DATA_PATH, "missing.tsv")
+NOT_AUTHORIZED_ID_FILE = os.path.join(DATA_PATH, "not_authorized.tsv")
+
+
+def get_user_params(DATA_PATH):
user_params = {}
# get user input params
- user_params['inList'] = os.path.join(data_path, 'corpus.csv')
- user_params['outList'] = os.path.join(data_path, 'full-corpus.csv')
- user_params['rawDir'] = os.path.join(data_path, 'rawdata/')
+ user_params['inList'] = os.path.join(DATA_PATH, 'corpus.csv')
+ user_params['outList'] = os.path.join(DATA_PATH, 'full-corpus.csv')
+ user_params['rawDir'] = os.path.join(DATA_PATH, 'rawdata/')
# apply defaults
if user_params['inList'] == '':
@@ -77,21 +80,38 @@ def get_user_params(data_path):
def dump_user_params(user_params):
# dump user params for confirmation
- print 'Input: ' + user_params['inList']
- print 'Output: ' + user_params['outList']
- print 'Raw data: ' + user_params['rawDir']
- return
+ print('Input: ' + user_params['inList'])
+ print('Output: ' + user_params['outList'])
+ print('Raw data: ' + user_params['rawDir'])
def read_total_list(in_filename):
# read total fetch list csv
- fp = open(in_filename, 'rb')
+ fp = open(in_filename, 'rt')
reader = csv.reader(fp, delimiter=',', quotechar='"')
+ if os.path.exists(MISSING_ID_FILE):
+ missing_ids = [line.strip()
+ for line in open(MISSING_ID_FILE, "r").readlines()]
+ else:
+ missing_ids = []
+
+ if os.path.exists(NOT_AUTHORIZED_ID_FILE):
+ not_authed_ids = [line.strip()
+ for line in open(NOT_AUTHORIZED_ID_FILE, "r").readlines()]
+ else:
+ not_authed_ids = []
+
+ print("We will skip %i tweets that are not available or visible any more on twitter" % (
+ len(missing_ids) + len(not_authed_ids)))
+
+ ignore_ids = set(missing_ids + not_authed_ids)
total_list = []
+
for row in reader:
- total_list.append(row)
+ if row[2] not in ignore_ids:
+ total_list.append(row)
return total_list
@@ -100,36 +120,28 @@ def purge_already_fetched(fetch_list, raw_dir):
# list of tweet ids that still need downloading
rem_list = []
+ count_done = 0
# check each tweet to see if we have it
for item in fetch_list:
# check if json file exists
- tweet_file = raw_dir + item[2] + '.json'
+ tweet_file = os.path.join(raw_dir, item[2] + '.json')
if os.path.exists(tweet_file):
# attempt to parse json file
try:
parse_tweet_json(tweet_file)
- print '--> already downloaded #' + item[2]
+ count_done += 1
except RuntimeError:
+ print("Error parsing", item)
rem_list.append(item)
else:
rem_list.append(item)
- return rem_list
-
-
-def get_time_left_str(cur_idx, fetch_list, download_pause):
-
- tweets_left = len(fetch_list) - cur_idx
- total_seconds = tweets_left * download_pause
-
- str_hr = int(total_seconds / 3600)
- str_min = int((total_seconds - str_hr * 3600) / 60)
- str_sec = total_seconds - str_hr * 3600 - str_min * 60
+ print("We have already downloaded %i tweets." % count_done)
- return '%dh %dm %ds' % (str_hr, str_min, str_sec)
+ return rem_list
def download_tweets(fetch_list, raw_dir):
@@ -138,35 +150,61 @@ def download_tweets(fetch_list, raw_dir):
if not os.path.exists(raw_dir):
os.mkdir(raw_dir)
- # stay within rate limits
- max_tweets_per_hr = 125
- download_pause_sec = 3600 / max_tweets_per_hr
-
# download tweets
for idx in range(0, len(fetch_list)):
# current item
item = fetch_list[idx]
-
- # print status
- trem = get_time_left_str(idx, fetch_list, download_pause_sec)
- print '--> downloading tweet #%s (%d of %d) (%s left)' % \
- (item[2], idx + 1, len(fetch_list), trem)
-
- # Old Twitter API 1.0
- # pull data
- # url = 'https://api.twitter.com/1/statuses/show.json?id=' + item[2]
- # print url
- # urllib.urlretrieve(url, raw_dir + item[2] + '.json')
-
- # New Twitter API 1.1
- json_data = api.GetStatus(item[2]).AsJsonString()
- with open(raw_dir + item[2] + '.json', "w") as f:
- f.write(json_data + "\n")
-
- # stay in Twitter API rate limits
- print ' pausing %d sec to obey Twitter API rate limits' % \
- (download_pause_sec)
- time.sleep(download_pause_sec)
+ print(item)
+
+ print('--> downloading tweet #%s (%d of %d)' %
+ (item[2], idx + 1, len(fetch_list)))
+
+ try:
+ #import pdb;pdb.set_trace()
+ response = api.statuses.show(_id=item[2])
+
+ if response.rate_limit_remaining <= 0:
+ wait_seconds = response.rate_limit_reset - time.time()
+ print("Rate limiting requests us to wait %f seconds" %
+ wait_seconds)
+ time.sleep(wait_seconds+5)
+
+ except twitter.TwitterError as e:
+ fatal = True
+ print(e)
+ for m in json.loads(e.response_data.decode())['errors']:
+ if m['code'] == 34:
+ print("Tweet missing: ", item)
+ with open(MISSING_ID_FILE, "at") as f:
+ f.write(item[2] + "\n")
+
+ fatal = False
+ break
+ elif m['code'] == 63:
+ print("User of tweet '%s' has been suspended." % item)
+ with open(MISSING_ID_FILE, "at") as f:
+ f.write(item[2] + "\n")
+
+ fatal = False
+ break
+ elif m['code'] == 88:
+ print("Rate limit exceeded.")
+ fatal = True
+ break
+ elif m['code'] == 179:
+ print("Not authorized to view this tweet.")
+ with open(NOT_AUTHORIZED_ID_FILE, "at") as f:
+ f.write(item[2] + "\n")
+ fatal = False
+ break
+
+ if fatal:
+ raise
+ else:
+ continue
+
+ with open(raw_dir + item[2] + '.json', "wt") as f:
+ f.write(json.dumps(dict(response)) + "\n")
return
@@ -174,13 +212,13 @@ def download_tweets(fetch_list, raw_dir):
def parse_tweet_json(filename):
# read tweet
- print 'opening: ' + filename
- fp = open(filename, 'rb')
+ fp = open(filename, 'r')
# parse json
try:
tweet_json = json.load(fp)
- except ValueError:
+ except ValueError as e:
+ print(e)
raise RuntimeError('error parsing json')
# look for twitter api error msgs
@@ -222,41 +260,49 @@ def build_output_corpus(out_filename, raw_dir, total_list):
writer.writerow(full_row)
except RuntimeError:
- print '--> bad data in tweet #' + item[2]
+ print('--> bad data in tweet #' + item[2])
missing_count += 1
else:
- print '--> missing tweet #' + item[2]
+ print('--> missing tweet #' + item[2])
missing_count += 1
# indicate success
if missing_count == 0:
- print '\nSuccessfully downloaded corpus!'
- print 'Output in: ' + out_filename + '\n'
+ print('\nSuccessfully downloaded corpus!')
+ print('Output in: ' + out_filename + '\n')
else:
- print '\nMissing %d of %d tweets!' % (missing_count, len(total_list))
- print 'Partial output in: ' + out_filename + '\n'
+ print('\nMissing %d of %d tweets!' % (missing_count, len(total_list)))
+ print('Partial output in: ' + out_filename + '\n')
return
-def main(data_path):
-
+def main():
# get user parameters
- user_params = get_user_params(data_path)
+ user_params = get_user_params(DATA_PATH)
+ print(user_params)
dump_user_params(user_params)
# get fetch list
total_list = read_total_list(user_params['inList'])
- fetch_list = purge_already_fetched(total_list, user_params['rawDir'])
-
- # start fetching data from twitter
- download_tweets(fetch_list, user_params['rawDir'])
- # second pass for any failed downloads
- print '\nStarting second pass to retry any failed downloads'
+ # remove already fetched or missing tweets
fetch_list = purge_already_fetched(total_list, user_params['rawDir'])
- download_tweets(fetch_list, user_params['rawDir'])
+ print("Fetching %i tweets..." % len(fetch_list))
+
+ if fetch_list:
+ # start fetching data from twitter
+ download_tweets(fetch_list, user_params['rawDir'])
+
+ # second pass for any failed downloads
+ fetch_list = purge_already_fetched(total_list, user_params['rawDir'])
+ if fetch_list:
+ print('\nStarting second pass to retry %i failed downloads...' %
+ len(fetch_list))
+ download_tweets(fetch_list, user_params['rawDir'])
+ else:
+ print("Nothing to fetch any more.")
# build output corpus
build_output_corpus(user_params['outList'], user_params['rawDir'],
@@ -264,4 +310,4 @@ def main(data_path):
if __name__ == '__main__':
- main("data")
+ main()
diff --git a/ch06/utils.py b/ch06/utils.py
index b60f5ad3..6757354f 100644
--- a/ch06/utils.py
+++ b/ch06/utils.py
@@ -6,6 +6,7 @@
# It is made available under the MIT License
import os
+import sys
import collections
import csv
import json
@@ -54,9 +55,13 @@ def load_sanders_data(dirname=".", line_count=-1):
tweet_fn = os.path.join(
DATA_DIR, dirname, 'rawdata', '%s.json' % tweet_id)
- tweet = json.load(open(tweet_fn, "r"))
- if 'text' in tweet and tweet['user']['lang'] == "en":
+ try:
+ tweet = json.load(open(tweet_fn, "r"))
+ except IOError:
+ print(("Tweet '%s' not found. Skip." % tweet_fn))
+ continue
+ if 'text' in tweet and tweet['user']['lang'] == "en":
topics.append(topic)
labels.append(label)
tweets.append(tweet['text'])
@@ -80,14 +85,14 @@ def plot_pr(auc_score, name, phase, precision, recall, label=None):
pylab.title('P/R curve (AUC=%0.2f) / %s' % (auc_score, label))
filename = name.replace(" ", "_")
pylab.savefig(os.path.join(CHART_DIR, "pr_%s_%s.png" %
- (filename, phase)), bbox_inches="tight")
+ (filename, phase)), bbox_inches="tight")
def show_most_informative_features(vectorizer, clf, n=20):
c_f = sorted(zip(clf.coef_[0], vectorizer.get_feature_names()))
- top = zip(c_f[:n], c_f[:-(n + 1):-1])
+ top = list(zip(c_f[:n], c_f[:-(n + 1):-1]))
for (c1, f1), (c2, f2) in top:
- print "\t%.4f\t%-15s\t\t%.4f\t%-15s" % (c1, f1, c2, f2)
+ print("\t%.4f\t%-15s\t\t%.4f\t%-15s" % (c1, f1, c2, f2))
def plot_log():
@@ -115,7 +120,7 @@ def plot_feat_importance(feature_names, clf, name):
inds = np.argsort(coef)
f_imp = f_imp[inds]
coef = coef[inds]
- xpos = np.array(range(len(coef)))
+ xpos = np.array(list(range(len(coef))))
pylab.bar(xpos, coef, width=1)
pylab.title('Feature importance for %s' % (name))
@@ -177,8 +182,13 @@ def plot_bias_variance(data_sizes, train_errors, test_errors, name):
def load_sent_word_net():
sent_scores = collections.defaultdict(list)
+ sentiwordnet_path = os.path.join(DATA_DIR, "SentiWordNet_3.0.0_20130122.txt")
+
+ if not os.path.exists(sentiwordnet_path):
+ print("Please download SentiWordNet_3.0.0 from http://sentiwordnet.isti.cnr.it/download.php, extract it and put it into the data directory")
+ sys.exit(1)
- with open(os.path.join(DATA_DIR, "SentiWordNet_3.0.0_20130122.txt"), "r") as csvfile:
+ with open(sentiwordnet_path, 'r') as csvfile:
reader = csv.reader(csvfile, delimiter='\t', quotechar='"')
for line in reader:
if line[0].startswith("#"):
@@ -196,7 +206,7 @@ def load_sent_word_net():
term = term.replace("-", " ").replace("_", " ")
key = "%s/%s" % (POS, term.split("#")[0])
sent_scores[key].append((float(PosScore), float(NegScore)))
- for key, value in sent_scores.iteritems():
+ for key, value in sent_scores.items():
sent_scores[key] = np.mean(value, axis=0)
return sent_scores
diff --git a/ch07/.gitignore b/ch07/.gitignore
index d21bb3d1..e33609d2 100644
--- a/ch07/.gitignore
+++ b/ch07/.gitignore
@@ -1 +1 @@
-.formula_*/
+*.png
diff --git a/ch07/README.rst b/ch07/README.rst
new file mode 100644
index 00000000..12a7b051
--- /dev/null
+++ b/ch07/README.rst
@@ -0,0 +1,42 @@
+=========
+Chapter 7
+=========
+
+Support code for *Chapter 7: Regression*
+
+
+Boston data analysis
+--------------------
+
+This dataset is shipped with sklearn. Thus, no extra download is required.
+
+
+boston1.py
+ Fit a linear regression model to the Boston house price data
+boston1numpy.py
+ Version of above script using numpy operations for linear regression
+boston_cv_penalized.py
+ Test different penalized (and OLS) regression schemes on the Boston dataset
+figure1_2.py
+ Show the regression line for Boston data
+figure3.py
+ Show the regression line for Boston data with OLS and Lasso
+figure4.py
+ Scatter plot of predicted-vs-actual for multidimensional regression
+
+10K data analysis
+-----------------
+
+lr10k.py
+ Linear regression on 10K dataset, evaluation by cross-validation
+predict10k_en.py
+ Elastic nets (including with inner cross-validation for parameter
+ settings). Produces scatter plot.
+
+
+MovieLens data analysis
+-----------------------
+
+In this chapter, we only consider a very simple approach, which is implemented
+in the ``usermodel.py`` script.
+
diff --git a/ch07/boston1.py b/ch07/boston1.py
index 8c8ebe84..d0b30447 100644
--- a/ch07/boston1.py
+++ b/ch07/boston1.py
@@ -5,20 +5,35 @@
#
# It is made available under the MIT License
+# This script shows an example of simple (ordinary) linear regression
+
+# The first edition of the book NumPy functions only for this operation. See
+# the file boston1numpy.py for that version.
+
import numpy as np
from sklearn.datasets import load_boston
-import pylab as plt
+from sklearn.linear_model import LinearRegression
+from matplotlib import pyplot as plt
boston = load_boston()
-x = np.array([np.concatenate((v, [1])) for v in boston.data])
+x = boston.data
y = boston.target
-s, total_error, _, _ = np.linalg.lstsq(x, y)
-rmse = np.sqrt(total_error[0] / len(x))
-print('Residual: {}'.format(rmse))
+# Fitting a model is trivial: call the ``fit`` method in LinearRegression:
+lr = LinearRegression()
+lr.fit(x, y)
+
+# The instance member `residues_` contains the sum of the squared residues
+rmse = np.sqrt(lr.residues_/len(x))
+print('RMSE: {}'.format(rmse))
+
+fig, ax = plt.subplots()
+# Plot a diagonal (for reference):
+ax.plot([0, 50], [0, 50], '-', color=(.9,.3,.3), lw=4)
+
+# Plot the prediction versus real:
+ax.scatter(lr.predict(x), boston.target)
-plt.plot(np.dot(x, s), boston.target, 'ro')
-plt.plot([0, 50], [0, 50], 'g-')
-plt.xlabel('predicted')
-plt.ylabel('real')
-plt.show()
+ax.set_xlabel('predicted')
+ax.set_ylabel('real')
+fig.savefig('Figure_07_08.png')
diff --git a/ch07/boston1numpy.py b/ch07/boston1numpy.py
new file mode 100644
index 00000000..0074f927
--- /dev/null
+++ b/ch07/boston1numpy.py
@@ -0,0 +1,31 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+# This script shows an example of simple (ordinary) linear regression
+
+import numpy as np
+from sklearn.datasets import load_boston
+import pylab as plt
+
+boston = load_boston()
+x = np.array([np.concatenate((v, [1])) for v in boston.data])
+y = boston.target
+
+# np.linal.lstsq implements least-squares linear regression
+s, total_error, _, _ = np.linalg.lstsq(x, y)
+
+rmse = np.sqrt(total_error[0] / len(x))
+print('Residual: {}'.format(rmse))
+
+# Plot the prediction versus real:
+plt.plot(np.dot(x, s), boston.target, 'ro')
+
+# Plot a diagonal (for reference):
+plt.plot([0, 50], [0, 50], 'g-')
+plt.xlabel('predicted')
+plt.ylabel('real')
+plt.show()
diff --git a/ch07/boston_cv10_penalized.py b/ch07/boston_cv10_penalized.py
deleted file mode 100644
index cc4abd30..00000000
--- a/ch07/boston_cv10_penalized.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-from __future__ import print_function
-from sklearn.cross_validation import KFold
-from sklearn.linear_model import ElasticNet, Lasso, Ridge
-from sklearn.linear_model import ElasticNetCV, LassoCV, RidgeCV
-import numpy as np
-from sklearn.datasets import load_boston
-boston = load_boston()
-x = np.array([np.concatenate((v, [1])) for v in boston.data])
-y = boston.target
-
-for name, met in [
- ('elastic-net(.5)', ElasticNet(fit_intercept=True, alpha=0.5)),
- ('lasso(.5)', Lasso(fit_intercept=True, alpha=0.5)),
- ('ridge(.5)', Ridge(fit_intercept=True, alpha=0.5)),
-]:
- met.fit(x, y)
- p = np.array([met.predict(xi) for xi in x])
- e = p - y
- total_error = np.dot(e, e)
- rmse_train = np.sqrt(total_error / len(p))
-
- kf = KFold(len(x), n_folds=10)
- err = 0
- for train, test in kf:
- met.fit(x[train], y[train])
- p = np.array([met.predict(xi) for xi in x[test]])
- e = p - y[test]
- err += np.dot(e, e)
-
- rmse_10cv = np.sqrt(err / len(x))
- print('Method: {}'.format(name))
- print('RMSE on training: {}'.format(rmse_train))
- print('RMSE on 10-fold CV: {}'.format(rmse_10cv))
- print()
- print()
diff --git a/ch07/boston_cv_penalized.py b/ch07/boston_cv_penalized.py
new file mode 100644
index 00000000..c894c4fa
--- /dev/null
+++ b/ch07/boston_cv_penalized.py
@@ -0,0 +1,46 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+# This script fits several forms of penalized regression
+
+from __future__ import print_function
+import numpy as np
+from sklearn.cross_validation import KFold
+from sklearn.linear_model import LinearRegression, ElasticNet, Lasso, Ridge
+from sklearn.metrics import r2_score
+from sklearn.datasets import load_boston
+boston = load_boston()
+x = boston.data
+y = boston.target
+
+for name, met in [
+ ('linear regression', LinearRegression()),
+ ('lasso()', Lasso()),
+ ('elastic-net(.5)', ElasticNet(alpha=0.5)),
+ ('lasso(.5)', Lasso(alpha=0.5)),
+ ('ridge(.5)', Ridge(alpha=0.5)),
+]:
+ # Fit on the whole data:
+ met.fit(x, y)
+
+ # Predict on the whole data:
+ p = met.predict(x)
+ r2_train = r2_score(y, p)
+
+ # Now, we use 10 fold cross-validation to estimate generalization error
+ kf = KFold(len(x), n_folds=5)
+ p = np.zeros_like(y)
+ for train, test in kf:
+ met.fit(x[train], y[train])
+ p[test] = met.predict(x[test])
+
+ r2_cv = r2_score(y, p)
+ print('Method: {}'.format(name))
+ print('R2 on training: {}'.format(r2_train))
+ print('R2 on 5-fold CV: {}'.format(r2_cv))
+ print()
+ print()
diff --git a/ch07/cv10_lr.py b/ch07/cv10_lr.py
deleted file mode 100644
index 2ae52e02..00000000
--- a/ch07/cv10_lr.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-from sklearn.cross_validation import KFold
-from sklearn.linear_model import LinearRegression, ElasticNet
-import numpy as np
-from sklearn.datasets import load_boston
-boston = load_boston()
-x = np.array([np.concatenate((v, [1])) for v in boston.data])
-y = boston.target
-FIT_EN = False
-
-if FIT_EN:
- model = ElasticNet(fit_intercept=True, alpha=0.5)
-else:
- model = LinearRegression(fit_intercept=True)
-model.fit(x, y)
-p = np.array([model.predict(xi) for xi in x])
-e = p - y
-total_error = np.dot(e, e)
-rmse_train = np.sqrt(total_error / len(p))
-
-kf = KFold(len(x), n_folds=10)
-err = 0
-for train, test in kf:
- model.fit(x[train], y[train])
- p = np.array([model.predict(xi) for xi in x[test]])
- e = p - y[test]
- err += np.dot(e, e)
-
-rmse_10cv = np.sqrt(err / len(x))
-print('RMSE on training: {}'.format(rmse_train))
-print('RMSE on 10-fold CV: {}'.format(rmse_10cv))
diff --git a/ch07/data/.gitignore b/ch07/data/.gitignore
new file mode 100644
index 00000000..3286ba0f
--- /dev/null
+++ b/ch07/data/.gitignore
@@ -0,0 +1 @@
+E2006.train
diff --git a/ch07/data/download.sh b/ch07/data/download.sh
old mode 100644
new mode 100755
index 6f938e06..74753364
--- a/ch07/data/download.sh
+++ b/ch07/data/download.sh
@@ -1,5 +1,3 @@
#!/usr/bin/env bash
-wget http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/E2006.train.bz2
+curl -O http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/E2006.train.bz2
bunzip2 E2006.train.bz2
-wget http://www.grouplens.org/system/files/ml-100k.zip
-unzip ml-100k.zip
diff --git a/ch07/figure1.py b/ch07/figure1.py
deleted file mode 100644
index a9c23af7..00000000
--- a/ch07/figure1.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-import numpy as np
-from sklearn.datasets import load_boston
-import pylab as plt
-from mpltools import style
-style.use('ggplot')
-
-boston = load_boston()
-plt.scatter(boston.data[:, 5], boston.target)
-plt.xlabel("RM")
-plt.ylabel("House Price")
-
-
-x = boston.data[:, 5]
-x = np.array([[v] for v in x])
-y = boston.target
-
-slope, res, _, _ = np.linalg.lstsq(x, y)
-plt.plot([0, boston.data[:, 5].max() + 1],
- [0, slope * (boston.data[:, 5].max() + 1)], '-', lw=4)
-plt.savefig('Figure1.png', dpi=150)
-
-rmse = np.sqrt(res[0] / len(x))
-print('Residual: {}'.format(rmse))
diff --git a/ch07/figure1_2.py b/ch07/figure1_2.py
new file mode 100644
index 00000000..3f11a0c7
--- /dev/null
+++ b/ch07/figure1_2.py
@@ -0,0 +1,63 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+import numpy as np
+from sklearn.datasets import load_boston
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_squared_error, r2_score
+from matplotlib import pyplot as plt
+
+boston = load_boston()
+
+# Index number five in the number of rooms
+fig,ax = plt.subplots()
+ax.scatter(boston.data[:, 5], boston.target)
+ax.set_xlabel("Average number of rooms (RM)")
+ax.set_ylabel("House Price")
+
+x = boston.data[:, 5]
+# fit (used below) takes a two-dimensional array as input. We use np.atleast_2d
+# to convert from one to two dimensional, then transpose to make sure that the
+# format matches:
+x = np.transpose(np.atleast_2d(x))
+
+y = boston.target
+
+lr = LinearRegression(fit_intercept=False)
+lr.fit(x, y)
+
+ax.plot([0, boston.data[:, 5].max() + 1],
+ [0, lr.predict(boston.data[:, 5].max() + 1)], '-', lw=4)
+fig.savefig('Figure1.png')
+
+mse = mean_squared_error(y, lr.predict(x))
+rmse = np.sqrt(mse)
+print('RMSE (no intercept): {}'.format(rmse))
+
+# Repeat, but fitting an intercept this time:
+lr = LinearRegression(fit_intercept=True)
+
+lr.fit(x, y)
+
+fig,ax = plt.subplots()
+ax.set_xlabel("Average number of rooms (RM)")
+ax.set_ylabel("House Price")
+ax.scatter(boston.data[:, 5], boston.target)
+xmin = x.min()
+xmax = x.max()
+ax.plot([xmin, xmax], lr.predict([[xmin], [xmax]]) , '-', lw=4)
+fig.savefig('Figure2.png')
+
+mse = mean_squared_error(y, lr.predict(x))
+print("Mean squared error (of training data): {:.3}".format(mse))
+
+rmse = np.sqrt(mse)
+print("Root mean squared error (of training data): {:.3}".format(rmse))
+
+cod = r2_score(y, lr.predict(x))
+print('COD (on training data): {:.2}'.format(cod))
+
diff --git a/ch07/figure2.py b/ch07/figure2.py
deleted file mode 100644
index c5977207..00000000
--- a/ch07/figure2.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-import numpy as np
-from sklearn.datasets import load_boston
-import pylab as plt
-from mpltools import style
-style.use('ggplot')
-
-boston = load_boston()
-plt.scatter(boston.data[:, 5], boston.target)
-plt.xlabel("RM")
-plt.ylabel("House Price")
-
-
-x = boston.data[:, 5]
-xmin = x.min()
-xmax = x.max()
-x = np.array([[v, 1] for v in x])
-y = boston.target
-
-(slope, bias), res, _, _ = np.linalg.lstsq(x, y)
-plt.plot([xmin, xmax], [slope * xmin + bias, slope * xmax + bias], '-', lw=4)
-plt.savefig('Figure2.png', dpi=150)
-
-rmse = np.sqrt(res[0] / len(x))
-print('Residual: {}'.format(rmse))
diff --git a/ch07/figure3.py b/ch07/figure3.py
new file mode 100644
index 00000000..7543c1ec
--- /dev/null
+++ b/ch07/figure3.py
@@ -0,0 +1,33 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+from sklearn.linear_model import LinearRegression, Lasso
+import numpy as np
+from sklearn.datasets import load_boston
+from matplotlib import pyplot as plt
+
+boston = load_boston()
+fig, ax = plt.subplots()
+ax.scatter(boston.data[:, 5], boston.target)
+ax.set_xlabel("Number of rooms (RM)")
+ax.set_ylabel("House Price")
+
+
+x = boston.data[:, 5]
+xmin = x.min()
+xmax = x.max()
+x = np.transpose(np.atleast_2d(x))
+y = boston.target
+
+lr = LinearRegression()
+lr.fit(x, y)
+ax.plot([xmin, xmax], lr.predict([[xmin], [xmax]]), ':', lw=4, label='OLS model')
+
+las = Lasso()
+las.fit(x, y)
+ax.plot([xmin, xmax], las.predict([ [xmin], [xmax] ]), '-', lw=4, label='Lasso model')
+fig.savefig('Figure3.png')
diff --git a/ch07/figure4.py b/ch07/figure4.py
index ab71b3a8..a24d48be 100644
--- a/ch07/figure4.py
+++ b/ch07/figure4.py
@@ -5,31 +5,29 @@
#
# It is made available under the MIT License
-from sklearn.linear_model import Lasso
+
+# This script plots prediction-vs-actual on training set for the Boston dataset
+# using OLS regression
import numpy as np
+from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_boston
-import pylab as plt
-from mpltools import style
-style.use('ggplot')
+from sklearn.metrics import mean_squared_error
+from matplotlib import pyplot as plt
boston = load_boston()
-plt.scatter(boston.data[:, 5], boston.target)
-plt.xlabel("RM")
-plt.ylabel("House Price")
-
-x = boston.data[:, 5]
-xmin = x.min()
-xmax = x.max()
-x = np.array([[v, 1] for v in x])
+x = boston.data
y = boston.target
-(slope, bias), res, _, _ = np.linalg.lstsq(x, y)
-plt.plot([xmin, xmax], [slope * xmin + bias, slope * xmax + bias], ':', lw=4)
+lr = LinearRegression()
+lr.fit(x, y)
+p = lr.predict(x)
+print("RMSE: {:.2}.".format(np.sqrt(mean_squared_error(y, p))))
+print("R2: {:.2}.".format(lr.score(x, y)))
+fig,ax = plt.subplots()
+ax.scatter(p, y)
+ax.set_xlabel('Predicted price')
+ax.set_ylabel('Actual price')
+ax.plot([y.min(), y.max()], [y.min(), y.max()], lw=4)
-las = Lasso()
-las.fit(x, y)
-y0 = las.predict([xmin, 1])
-y1 = las.predict([xmax, 1])
-plt.plot([xmin, xmax], [y0, y1], '-', lw=4)
-plt.savefig('Figure3.png', dpi=150)
+fig.savefig('Figure4.png')
diff --git a/ch07/lasso_path_plot.py b/ch07/lasso_path_plot.py
new file mode 100644
index 00000000..eab64c26
--- /dev/null
+++ b/ch07/lasso_path_plot.py
@@ -0,0 +1,29 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+from sklearn.linear_model import Lasso
+from sklearn.datasets import load_boston
+from matplotlib import pyplot as plt
+import numpy as np
+
+boston = load_boston()
+x = boston.data
+y = boston.target
+
+las = Lasso(normalize=1)
+alphas = np.logspace(-5, 2, 1000)
+alphas, coefs, _= las.path(x, y, alphas=alphas)
+
+fig,ax = plt.subplots()
+ax.plot(alphas, coefs.T)
+ax.set_xscale('log')
+ax.set_xlim(alphas.max(), alphas.min())
+ax.set_xlabel('Lasso coefficient path as a function of alpha')
+ax.set_xlabel('Alpha')
+ax.set_ylabel('Coefficient weight')
+fig.savefig('Figure_LassoPath.png')
+
diff --git a/ch07/lr10k.py b/ch07/lr10k.py
index 71afd412..831706a1 100644
--- a/ch07/lr10k.py
+++ b/ch07/lr10k.py
@@ -6,31 +6,33 @@
# It is made available under the MIT License
import numpy as np
+from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_svmlight_file
-from sklearn.linear_model import ElasticNet, LinearRegression
-data, target = load_svmlight_file('E2006.train')
-lr = LinearRegression(fit_intercept=True)
-
+from sklearn.linear_model import LinearRegression
from sklearn.cross_validation import KFold
-kf = KFold(len(target), n_folds=10)
-err = 0
-for train, test in kf:
- lr.fit(data[train], target[train])
- p = map(lr.predict, data[test])
- p = np.array(p).ravel()
- e = p - target[test]
- err += np.dot(e, e)
-rmse_10cv = np.sqrt(err / len(target))
+# Whether to use Elastic nets (otherwise, ordinary linear regression is used)
+
+# Load data:
+data, target = load_svmlight_file('data/E2006.train')
+lr = LinearRegression()
+
+# Compute error on training data to demonstrate that we can obtain near perfect
+# scores:
lr.fit(data, target)
-p = np.array(map(lr.predict, data))
-p = p.ravel()
-e = p - target
-total_error = np.dot(e, e)
-rmse_train = np.sqrt(total_error / len(p))
+pred = lr.predict(data)
+
+print('RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
+print('R2 on training, {:.2}'.format(r2_score(target, pred)))
+print('')
+pred = np.zeros_like(target)
+kf = KFold(len(target), n_folds=5)
+for train, test in kf:
+ lr.fit(data[train], target[train])
+ pred[test] = lr.predict(data[test])
-print('RMSE on training: {}'.format(rmse_train))
-print('RMSE on 10-fold CV: {}'.format(rmse_10cv))
+print('RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
+print('R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))
diff --git a/ch07/predict10k_en.py b/ch07/predict10k_en.py
index d0f1456b..a7dd960a 100644
--- a/ch07/predict10k_en.py
+++ b/ch07/predict10k_en.py
@@ -8,33 +8,66 @@
import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.cross_validation import KFold
-from sklearn.linear_model import ElasticNet, LinearRegression
+from sklearn.linear_model import ElasticNetCV, ElasticNet
+from sklearn.metrics import mean_squared_error, r2_score
+from matplotlib import pyplot as plt
data, target = load_svmlight_file('data/E2006.train')
-lr = LinearRegression(fit_intercept=True)
-en = ElasticNet(fit_intercept=True, alpha=.1)
-met = en
+# Edit the lines below if you want to switch method:
+# from sklearn.linear_model import Lasso
+# met = Lasso(alpha=0.1)
+met = ElasticNet(alpha=0.1)
-kf = KFold(len(target), n_folds=10)
-err = 0
+kf = KFold(len(target), n_folds=5)
+pred = np.zeros_like(target)
for train, test in kf:
met.fit(data[train], target[train])
- p = map(met.predict, data[test])
- p = np.array(p).ravel()
- e = p - target[test]
- err += np.dot(e, e)
+ pred[test] = met.predict(data[test])
-rmse_10cv = np.sqrt(err / len(target))
+print('[EN 0.1] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
+print('[EN 0.1] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))
+print('')
+# Construct an ElasticNetCV object (use all available CPUs)
+met = ElasticNetCV(n_jobs=-1)
+
+kf = KFold(len(target), n_folds=5)
+pred = np.zeros_like(target)
+for train, test in kf:
+ met.fit(data[train], target[train])
+ pred[test] = met.predict(data[test])
+
+print('[EN CV] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
+print('[EN CV] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))
+print('')
met.fit(data, target)
-p = np.array(map(met.predict, data))
-p = p.ravel()
-e = p - target
-total_error = np.dot(e, e)
-rmse_train = np.sqrt(total_error / len(p))
+pred = met.predict(data)
+print('[EN CV] RMSE on training, {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
+print('[EN CV] R2 on training, {:.2}'.format(r2_score(target, pred)))
+
+
+# Construct an ElasticNetCV object (use all available CPUs)
+met = ElasticNetCV(n_jobs=-1, l1_ratio=[.01, .05, .25, .5, .75, .95, .99])
+
+kf = KFold(len(target), n_folds=5)
+pred = np.zeros_like(target)
+for train, test in kf:
+ met.fit(data[train], target[train])
+ pred[test] = met.predict(data[test])
+
+
+print('[EN CV l1_ratio] RMSE on testing (5 fold), {:.2}'.format(np.sqrt(mean_squared_error(target, pred))))
+print('[EN CV l1_ratio] R2 on testing (5 fold), {:.2}'.format(r2_score(target, pred)))
+print('')
+
+fig, ax = plt.subplots()
+y = target
+ax.scatter(y, pred, c='k')
+ax.plot([-5,-1], [-5,-1], 'r-', lw=2)
+ax.set_xlabel('Actual value')
+ax.set_ylabel('Predicted value')
+fig.savefig('Figure_10k_scatter_EN_l1_ratio.png')
-print('RMSE on training: {}'.format(rmse_train))
-print('RMSE on 10-fold CV: {}'.format(rmse_10cv))
diff --git a/ch07/usermodel.py b/ch07/usermodel.py
deleted file mode 100644
index bb8c2ce1..00000000
--- a/ch07/usermodel.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-import numpy as np
-from scipy import sparse
-from sklearn.linear_model import LassoCV, RidgeCV, ElasticNetCV
-from sklearn.cross_validation import KFold
-
-data = np.array([[int(tok) for tok in line.split('\t')[:3]]
- for line in open('data/ml-100k/u.data')])
-ij = data[:, :2]
-ij -= 1 # original data is in 1-based system
-values = data[:, 2]
-reviews = sparse.csc_matrix((values, ij.T)).astype(float)
-
-reg = ElasticNetCV(fit_intercept=True, alphas=[
- 0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
-
-
-def movie_norm(xc):
- xc = xc.copy().toarray()
- x1 = np.array([xi[xi > 0].mean() for xi in xc])
- x1 = np.nan_to_num(x1)
-
- for i in range(xc.shape[0]):
- xc[i] -= (xc[i] > 0) * x1[i]
- return xc, x1
-
-
-def learn_for(i):
- u = reviews[i]
- us = np.delete(np.arange(reviews.shape[0]), i)
- ps, = np.where(u.toarray().ravel() > 0)
- x = reviews[us][:, ps].T
- y = u.data
- err = 0
- eb = 0
- kf = KFold(len(y), n_folds=4)
- for train, test in kf:
- xc, x1 = movie_norm(x[train])
- reg.fit(xc, y[train] - x1)
-
- xc, x1 = movie_norm(x[test])
- p = np.array([reg.predict(xi) for xi in xc]).ravel()
- e = (p + x1) - y[test]
- err += np.sum(e * e)
- eb += np.sum((y[train].mean() - y[test]) ** 2)
- return np.sqrt(err / float(len(y))), np.sqrt(eb / float(len(y)))
-
-whole_data = []
-for i in range(reviews.shape[0]):
- s = learn_for(i)
- print(s[0] < s[1])
- print(s)
- whole_data.append(s)
diff --git a/ch08/README.rst b/ch08/README.rst
new file mode 100644
index 00000000..488844e6
--- /dev/null
+++ b/ch08/README.rst
@@ -0,0 +1,41 @@
+=========
+Chapter 8
+=========
+
+Support code for *Chapter 8: Recommendations*.
+
+The code refers to the second edition of the book and this code has been
+significantly refactored when compared to the first one.
+
+Ratings Prediction
+------------------
+
+Note that since the partition of the data into training and testing is random,
+everytime you run the code, the results will be different.
+
+
+load_ml100k.py
+ Load data & partition into test/train
+norm.py
+ Normalize the data
+corrneighbours.py
+ Neighbour models based on ncrroaltoin
+regression.py
+ Regression models
+stacked.py
+ Stacked predictions
+averaged.py
+ Averaging of predictions (mentioned in book, but code is not shown there).
+
+Association Rule Mining
+-----------------------
+
+Check the folder ``apriori/``
+
+apriori/histogram.py
+ Print a histogram of how many times each product was bought
+apriori/apriori.py
+ Implementation of Apriori algorithm and association rule building
+apriori/apriori_example.py
+ Example of Apriori algorithm in retail dataset
+
diff --git a/ch08/all_correlations.py b/ch08/all_correlations.py
index df9c30bc..d3817bf0 100644
--- a/ch08/all_correlations.py
+++ b/ch08/all_correlations.py
@@ -7,10 +7,15 @@
import numpy as np
-# This is the version in the book:
-
-
-def all_correlations(bait, target):
+def all_correlations(y, X):
+ from scipy import spatial
+ y = np.atleast_2d(y)
+ sp = spatial.distance.cdist(X, y, 'correlation')
+ # The "correlation distance" is 1 - corr(x,y); so we invert that to obtain the correlation
+ return 1 - sp.ravel()
+
+# This is the version in the book (1st Edition):
+def all_correlations_book_version(bait, target):
'''
corrs = all_correlations(bait, target)
@@ -21,9 +26,7 @@ def all_correlations(bait, target):
for c in target])
# This is a faster, but harder to read, implementation:
-
-
-def all_correlations(y, X):
+def all_correlations_fast_no_scipy(y, X):
'''
Cs = all_correlations(y, X)
@@ -41,3 +44,5 @@ def all_correlations(y, X):
xs_ += 1e-5 # Handle zeros in x
return (xy - x_ * y_ * n) / n / xs_ / ys_
+
+
diff --git a/ch08/apriori/.gitignore b/ch08/apriori/.gitignore
index 05697a95..6379e20f 100644
--- a/ch08/apriori/.gitignore
+++ b/ch08/apriori/.gitignore
@@ -1 +1 @@
-retail.dat
+retail.dat.gz
diff --git a/ch08/apriori/apriori.py b/ch08/apriori/apriori.py
index 1ebb7e54..eeaf0d46 100644
--- a/ch08/apriori/apriori.py
+++ b/ch08/apriori/apriori.py
@@ -5,9 +5,12 @@
#
# It is made available under the MIT License
+from collections import namedtuple
+
+
def apriori(dataset, minsupport, maxsize):
'''
- freqsets, baskets = apriori(dataset, minsupport, maxsize)
+ freqsets, support = apriori(dataset, minsupport, maxsize)
Parameters
----------
@@ -21,48 +24,70 @@ def apriori(dataset, minsupport, maxsize):
Returns
-------
freqsets : sequence of sequences
- baskets : dictionary
+ support : dictionary
+ This associates each itemset (represented as a frozenset) with a float
+ (the support of that itemset)
'''
from collections import defaultdict
baskets = defaultdict(list)
pointers = defaultdict(list)
+
for i, ds in enumerate(dataset):
for ell in ds:
pointers[ell].append(i)
baskets[frozenset([ell])].append(i)
- pointers = dict([(k, frozenset(v)) for k, v in pointers.items()])
- baskets = dict([(k, frozenset(v)) for k, v in baskets.items()])
- valid = set(list(el)[0]
- for el, c in baskets.items() if (len(c) >= minsupport))
- dataset = [[el for el in ds if (el in valid)] for ds in dataset]
- dataset = [ds for ds in dataset if len(ds) > 1]
- dataset = map(frozenset, dataset)
+ # Convert pointer items to frozensets to speed up operations later
+ new_pointers = dict()
+ for k in pointers:
+ if len(pointers[k]) >= minsupport:
+ new_pointers[k] = frozenset(pointers[k])
+ pointers = new_pointers
+ for k in baskets:
+ baskets[k] = frozenset(baskets[k])
+
+ # Valid are all elements whose support is >= minsupport
+ valid = set()
+ for el, c in baskets.items():
+ if len(c) >= minsupport:
+ valid.update(el)
+
+ # Itemsets at first iteration are simply all singleton with valid elements:
itemsets = [frozenset([v]) for v in valid]
freqsets = []
for i in range(maxsize - 1):
- print(len(itemsets))
+ print("At iteration {}, number of frequent baskets: {}".format(
+ i, len(itemsets)))
newsets = []
- for i, ell in enumerate(itemsets):
- ccounts = baskets[ell]
- for v_, pv in pointers.items():
- if v_ not in ell:
+ for it in itemsets:
+ ccounts = baskets[it]
+
+ for v, pv in pointers.items():
+ if v not in it:
csup = (ccounts & pv)
if len(csup) >= minsupport:
- new = frozenset(ell | set([v_]))
+ new = frozenset(it | frozenset([v]))
if new not in baskets:
newsets.append(new)
baskets[new] = csup
freqsets.extend(itemsets)
itemsets = newsets
- return freqsets, baskets
+ if not len(itemsets):
+ break
+ support = {}
+ for k in baskets:
+ support[k] = float(len(baskets[k]))
+ return freqsets, support
-def association_rules(dataset, freqsets, baskets, minlift):
+# A namedtuple to collect all values that may be interesting
+AssociationRule = namedtuple('AssociationRule', ['antecendent', 'consequent', 'base', 'py_x', 'lift'])
+
+def association_rules(dataset, freqsets, support, minlift):
'''
- for (antecendent, consequent, base, py_x, lift) in association_rules(dataset, freqsets, baskets, minlift):
+ for assoc_rule in association_rules(dataset, freqsets, support, minlift):
...
This function takes the returns from ``apriori``.
@@ -72,9 +97,13 @@ def association_rules(dataset, freqsets, baskets, minlift):
dataset : sequence of sequences
input dataset
freqsets : sequence of sequences
- baskets : dictionary
+ support : dictionary
minlift : int
minimal lift of yielded rules
+
+ Returns
+ -------
+ assoc_rule : sequence of AssociationRule objects
'''
nr_transactions = float(len(dataset))
freqsets = [f for f in freqsets if len(f) > 1]
@@ -82,8 +111,9 @@ def association_rules(dataset, freqsets, baskets, minlift):
for f in fset:
consequent = frozenset([f])
antecendent = fset - consequent
- base = len(baskets[consequent]) / nr_transactions
- py_x = len(baskets[fset]) / float(len(baskets[antecendent]))
+ py_x = support[fset] / support[antecendent]
+ base = support[consequent] / nr_transactions
lift = py_x / base
if lift > minlift:
- yield (antecendent, consequent, base, py_x, lift)
+ yield AssociationRule(antecendent, consequent, base, py_x, lift)
+
diff --git a/ch08/apriori/apriori_example.py b/ch08/apriori/apriori_example.py
index da77e96c..971ed4d5 100644
--- a/ch08/apriori/apriori_example.py
+++ b/ch08/apriori/apriori_example.py
@@ -7,10 +7,17 @@
from apriori import apriori, association_rules
from gzip import GzipFile
+
+# Load dataset
dataset = [[int(tok) for tok in line.strip().split()]
for line in GzipFile('retail.dat.gz')]
-freqsets, baskets = apriori(dataset, 80, maxsize=5)
-nr_transactions = float(len(dataset))
-for ant, con, base, pyx, lift in association_rules(dataset, freqsets, baskets, 30):
- print('{} | {} | {} ({:%}) | {} | {} | {}'
- .format(ant, con, len(baskets[con]), len(baskets[con]) / nr_transactions, len(baskets[ant]), len(baskets[con | ant]), int(lift)))
+
+freqsets, support = apriori(dataset, 80, maxsize=16)
+rules = list(association_rules(dataset, freqsets, support, minlift=30.0))
+
+rules.sort(key=(lambda ar: -ar.lift))
+for ar in rules:
+ print('{} -> {} (lift = {:.4})'
+ .format(set(ar.antecendent),
+ set(ar.consequent),
+ ar.lift))
diff --git a/ch08/apriori/apriori_naive.py b/ch08/apriori/apriori_naive.py
index cb50a8e1..03ab9130 100644
--- a/ch08/apriori/apriori_naive.py
+++ b/ch08/apriori/apriori_naive.py
@@ -5,35 +5,83 @@
#
# It is made available under the MIT License
-import numpy as np
from collections import defaultdict
from itertools import chain
from gzip import GzipFile
-minsupport = 44
+minsupport = 80
dataset = [[int(tok) for tok in line.strip().split()]
for line in GzipFile('retail.dat.gz')]
-dataset = dataset[::20]
counts = defaultdict(int)
for elem in chain(*dataset):
counts[elem] += 1
+# Only elements that have at least minsupport should be considered.
valid = set(el for el, c in counts.items() if (c >= minsupport))
+
+# Filter the dataset to contain only valid elements
+# (This step is not strictly necessary, but will make the rest of the code
+# faster as the itemsets will be smaller):
dataset = [[el for el in ds if (el in valid)] for ds in dataset]
-dataset = [frozenset(ds) for ds in dataset if len(ds) > 1]
+# Convert to frozenset for fast processing
+dataset = [frozenset(ds) for ds in dataset]
itemsets = [frozenset([v]) for v in valid]
-allsets = [itemsets]
+freqsets = itemsets[:]
for i in range(16):
- print(len(itemsets))
+ print("At iteration {}, number of frequent baskets: {}".format(
+ i, len(itemsets)))
nextsets = []
- for i, ell in enumerate(itemsets):
- for v_ in valid:
- if v_ not in ell:
- c = (ell | set([v_]))
- if sum(1 for d in dataset if d.issuperset(c)) > minsupport:
+
+ tested = set()
+ for it in itemsets:
+ for v in valid:
+ if v not in it:
+ # Create a new candidate set by adding v to it
+ c = (it | frozenset([v]))
+
+ # Check if we have tested it already:
+ if c in tested:
+ continue
+ tested.add(c)
+
+ # Count support by looping over dataset
+ # This step is slow.
+ # Check `apriori.py` for a better implementation.
+ support_c = sum(1 for d in dataset if d.issuperset(c))
+ if support_c > minsupport:
nextsets.append(c)
- allsets.append(nextsets)
+ freqsets.extend(nextsets)
itemsets = nextsets
+ if not len(itemsets):
+ break
+print("Finished!")
+
+
+def rules_from_itemset(itemset, dataset, minlift=1.):
+ nr_transactions = float(len(dataset))
+ for item in itemset:
+ consequent = frozenset([item])
+ antecedent = itemset-consequent
+ base = 0.0
+ # acount: antecedent count
+ acount = 0.0
+
+ # ccount : consequent count
+ ccount = 0.0
+ for d in dataset:
+ if item in d: base += 1
+ if d.issuperset(itemset): ccount += 1
+ if d.issuperset(antecedent): acount += 1
+ base /= nr_transactions
+ p_y_given_x = ccount/acount
+ lift = p_y_given_x / base
+ if lift > minlift:
+ print('Rule {0} -> {1} has lift {2}'
+ .format(antecedent, consequent,lift))
+
+for itemset in freqsets:
+ if len(itemset) > 1:
+ rules_from_itemset(itemset, dataset, minlift=4.)
diff --git a/ch08/averaged.py b/ch08/averaged.py
new file mode 100644
index 00000000..5b19bba7
--- /dev/null
+++ b/ch08/averaged.py
@@ -0,0 +1,33 @@
+import numpy as np
+import load_ml100k
+import regression
+import corrneighbours
+from sklearn import metrics
+import norm
+
+def predict(train):
+ predicted0 = regression.predict(train)
+ predicted1 = regression.predict(train.T).T
+ predicted2 = corrneighbours.predict(train)
+ predicted3 = corrneighbours.predict(train.T).T
+ predicted4 = norm.predict(train)
+ predicted5 = norm.predict(train.T).T
+ stack = np.array([
+ predicted0,
+ predicted1,
+ predicted2,
+ predicted3,
+ predicted4,
+ predicted5,
+ ])
+ return stack.mean(0)
+
+
+def main():
+ train,test = load_ml100k.get_train_test(random_state=12)
+ predicted = predict(train)
+ r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
+ print('R2 averaged: {:.2%}'.format(r2))
+
+if __name__ == '__main__':
+ main()
diff --git a/ch08/chapter.py b/ch08/chapter.py
new file mode 100644
index 00000000..d039d93f
--- /dev/null
+++ b/ch08/chapter.py
@@ -0,0 +1,208 @@
+import numpy as np # NOT IN BOOK
+from matplotlib import pyplot as plt # NOT IN BOOK
+
+def load():
+ import numpy as np
+ from scipy import sparse
+
+ data = np.loadtxt('data/ml-100k/u.data')
+ ij = data[:, :2]
+ ij -= 1 # original data is in 1-based system
+ values = data[:, 2]
+ reviews = sparse.csc_matrix((values, ij.T)).astype(float)
+ return reviews.toarray()
+reviews = load()
+U,M = np.where(reviews)
+import random
+test_idxs = np.array(random.sample(range(len(U)), len(U)//10))
+
+train = reviews.copy()
+train[U[test_idxs], M[test_idxs]] = 0
+
+test = np.zeros_like(reviews)
+test[U[test_idxs], M[test_idxs]] = reviews[U[test_idxs], M[test_idxs]]
+
+class NormalizePositive(object):
+ def __init__(self, axis=0):
+ self.axis = axis
+
+ def fit(self, features, y=None):
+ if self.axis == 1:
+ features = features.T
+ # count features that are greater than zero in axis 0:
+ binary = (features > 0)
+
+ count0 = binary.sum(axis=0)
+
+ # to avoid division by zero, set zero counts to one:
+ count0[count0 == 0] = 1.
+
+ # computing the mean is easy:
+ self.mean = features.sum(axis=0)/count0
+
+ # only consider differences where binary is True:
+ diff = (features - self.mean) * binary
+ diff **= 2
+ # regularize the estimate of std by adding 0.1
+ self.std = np.sqrt(0.1 + diff.sum(axis=0)/count0)
+ return self
+
+
+ def transform(self, features):
+ if self.axis == 1:
+ features = features.T
+ binary = (features > 0)
+ features = features - self.mean
+ features /= self.std
+ features *= binary
+ if self.axis == 1:
+ features = features.T
+ return features
+
+ def inverse_transform(self, features, copy=True):
+ if copy:
+ features = features.copy()
+ if self.axis == 1:
+ features = features.T
+ features *= self.std
+ features += self.mean
+ if self.axis == 1:
+ features = features.T
+ return features
+
+ def fit_transform(self, features):
+ return self.fit(features).transform(features)
+
+
+norm = NormalizePositive(axis=1)
+binary = (train > 0)
+train = norm.fit_transform(train)
+# plot just 200x200 area for space reasons
+plt.imshow(binary[:200, :200], interpolation='nearest')
+
+from scipy.spatial import distance
+# compute all pair-wise distances:
+dists = distance.pdist(binary, 'correlation')
+# Convert to square form, so that dists[i,j]
+# is distance between binary[i] and binary[j]:
+dists = distance.squareform(dists)
+neighbors = dists.argsort(axis=1)
+
+# We are going to fill this matrix with results
+filled = train.copy()
+for u in range(filled.shape[0]):
+ # n_u is neighbors of user
+ n_u = neighbors[u, 1:]
+ for m in range(filled.shape[1]):
+ # get relevant reviews in order!
+ revs = [train[neigh, m]
+ for neigh in n_u
+ if binary [neigh, m]]
+ if len(revs):
+ # n is the number of reviews for this movie
+ n = len(revs)
+ # take half of the reviews plus one into consideration:
+ n //= 2
+ n += 1
+ revs = revs[:n]
+ filled[u,m] = np.mean(revs)
+
+predicted = norm.inverse_transform(filled)
+from sklearn import metrics
+r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
+print('R2 score (binary neighbors): {:.1%}'.format(r2))
+
+reviews = reviews.T
+# use same code as before
+r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
+print('R2 score (binary movie neighbors): {:.1%}'.format(r2))
+
+
+from sklearn.linear_model import ElasticNetCV # NOT IN BOOK
+
+reg = ElasticNetCV(alphas=[
+ 0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
+filled = train.copy()
+# iterate over all users:
+for u in range(train.shape[0]):
+ curtrain = np.delete(train, u, axis=0)
+ bu = binary[u]
+ reg.fit(curtrain[:,bu].T, train[u, bu])
+ filled[u, ~bu] = reg.predict(curtrain[:,~bu].T)
+predicted = norm.inverse_transform(filled)
+r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
+print('R2 score (user regression): {:.1%}'.format(r2))
+
+
+# SHOPPING BASKET ANALYSIS
+# This is the slow version of the code, which will take a long time to
+# complete.
+
+
+from collections import defaultdict
+from itertools import chain
+
+# File is downloaded as a compressed file
+import gzip
+# file format is a line per transaction
+# of the form '12 34 342 5...'
+dataset = [[int(tok) for tok in line.strip().split()]
+ for line in gzip.open('data/retail.dat.gz')]
+dataset = [set(d) for d in dataset]
+# count how often each product was purchased:
+counts = defaultdict(int)
+for elem in chain(*dataset):
+ counts[elem] += 1
+
+minsupport = 80
+valid = set(k for k,v in counts.items() if (v >= minsupport))
+itemsets = [frozenset([v]) for v in valid]
+freqsets = []
+for i in range(16):
+ nextsets = []
+ tested = set()
+ for it in itemsets:
+ for v in valid:
+ if v not in it:
+ # Create a new candidate set by adding v to it
+ c = (it | frozenset([v]))
+ # check If we have tested it already
+ if c in tested:
+ continue
+ tested.add(c)
+
+ # Count support by looping over dataset
+ # This step is slow.
+ # Check `apriori.py` for a better implementation.
+ support_c = sum(1 for d in dataset if d.issuperset(c))
+ if support_c > minsupport:
+ nextsets.append(c)
+ freqsets.extend(nextsets)
+ itemsets = nextsets
+ if not len(itemsets):
+ break
+print("Finished!")
+
+
+minlift = 5.0
+nr_transactions = float(len(dataset))
+for itemset in freqsets:
+ for item in itemset:
+ consequent = frozenset([item])
+ antecedent = itemset-consequent
+ base = 0.0
+ # acount: antecedent count
+ acount = 0.0
+
+ # ccount : consequent count
+ ccount = 0.0
+ for d in dataset:
+ if item in d: base += 1
+ if d.issuperset(itemset): ccount += 1
+ if d.issuperset(antecedent): acount += 1
+ base /= nr_transactions
+ p_y_given_x = ccount/acount
+ lift = p_y_given_x / base
+ if lift > minlift:
+ print('Rule {0} -> {1} has lift {2}'
+ .format(antecedent, consequent,lift))
diff --git a/ch08/corrneighbours.py b/ch08/corrneighbours.py
index 6f9b2e4e..eb30e685 100644
--- a/ch08/corrneighbours.py
+++ b/ch08/corrneighbours.py
@@ -6,50 +6,53 @@
# It is made available under the MIT License
from __future__ import print_function
-from all_correlations import all_correlations
import numpy as np
-from scipy import sparse
-from load_ml100k import load
-reviews = load()
-
-
-def estimate_user(user, rest):
- bu = user > 0
- br = rest > 0
- ws = all_correlations(bu, br)
- selected = ws.argsort()[-100:]
- estimates = rest[selected].mean(0)
- estimates /= (.1 + br[selected].mean(0))
- return estimates
-
-
-def train_test(user, rest):
- estimates = estimate_user(user, rest)
- bu = user > 0
- br = rest > 0
- err = estimates[bu] - user[bu]
- null = rest.mean(0)
- null /= (.1 + br.mean(0))
- nerr = null[bu] - user[bu]
- return np.dot(err, err), np.dot(nerr, nerr)
-
-
-def cross_validate_all():
- err = []
- for i in xrange(reviews.shape[0]):
- err.append(
- train_test(reviews[i], np.delete(reviews, i, 0))
- )
- revs = (reviews > 0).sum(1)
- err = np.array(err)
- rmse = np.sqrt(err / revs[:, None])
- print(np.mean(rmse, 0))
- print(np.mean(rmse[revs > 60], 0))
-
-
-def all_estimates(reviews):
- reviews = reviews.toarray()
- estimates = np.zeros_like(reviews)
- for i in xrange(reviews.shape[0]):
- estimates[i] = estimate_user(reviews[i], np.delete(reviews, i, 0))
- return estimates
+from load_ml100k import get_train_test
+from scipy.spatial import distance
+from sklearn import metrics
+
+from norm import NormalizePositive
+
+def predict(otrain):
+ binary = (otrain > 0)
+ norm = NormalizePositive(axis=1)
+ train = norm.fit_transform(otrain)
+
+ dists = distance.pdist(binary, 'correlation')
+ dists = distance.squareform(dists)
+
+ neighbors = dists.argsort(axis=1)
+ filled = train.copy()
+ for u in range(filled.shape[0]):
+ # n_u are the neighbors of user
+ n_u = neighbors[u, 1:]
+ for m in range(filled.shape[1]):
+ # This code could be faster using numpy indexing trickery as the
+ # cost of readibility (this is left as an exercise to the reader):
+ revs = [train[neigh, m]
+ for neigh in n_u
+ if binary[neigh, m]]
+ if len(revs):
+ n = len(revs)
+ n //= 2
+ n += 1
+ revs = revs[:n]
+ filled[u,m] = np.mean(revs)
+
+ return norm.inverse_transform(filled)
+
+def main(transpose_inputs=False):
+ train, test = get_train_test(random_state=12)
+ if transpose_inputs:
+ train = train.T
+ test = test.T
+
+ predicted = predict(train)
+ r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
+ print('R2 score (binary {} neighbours): {:.1%}'.format(
+ ('movie' if transpose_inputs else 'user'),
+ r2))
+
+if __name__ == '__main__':
+ main()
+ main(transpose_inputs=True)
diff --git a/ch08/data/.gitignore b/ch08/data/.gitignore
new file mode 100644
index 00000000..c391d625
--- /dev/null
+++ b/ch08/data/.gitignore
@@ -0,0 +1,3 @@
+retail.dat.gz
+ml-100k.zip
+/ml-100k/
diff --git a/ch08/data/download.sh b/ch08/data/download.sh
new file mode 100755
index 00000000..b671171d
--- /dev/null
+++ b/ch08/data/download.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+curl -L -O http://files.grouplens.org/papers/ml-100k.zip
+unzip ml-100k.zip
+curl -L -O http://fimi.ua.ac.be/data/retail.dat.gz
diff --git a/ch08/figure3.py b/ch08/figure3.py
index da699902..daafc300 100644
--- a/ch08/figure3.py
+++ b/ch08/figure3.py
@@ -8,9 +8,8 @@
from load_ml100k import load
from matplotlib import pyplot as plt
data = load()
-data = data.toarray()
plt.gray()
plt.imshow(data[:200, :200], interpolation='nearest')
plt.xlabel('User ID')
plt.ylabel('Film ID')
-plt.savefig('../1400_08_03+.png')
+plt.savefig('Figure_08_03_DataMatrix.png')
diff --git a/ch08/load_ml100k.py b/ch08/load_ml100k.py
index e636a59c..7096e75c 100644
--- a/ch08/load_ml100k.py
+++ b/ch08/load_ml100k.py
@@ -5,15 +5,55 @@
#
# It is made available under the MIT License
-import numpy as np
-from scipy import sparse
+def load():
+ '''Load ML-100k data
+ Returns the review matrix as a numpy array'''
+ import numpy as np
+ from scipy import sparse
+ from os import path
-def load():
- data = np.array([[int(t) for t in line.split('\t')[:3]]
- for line in open('data/ml-100k/u.data')])
+ if not path.exists('data/ml-100k/u.data'):
+ raise IOError("Data has not been downloaded.\nTry the following:\n\n\tcd data\n\t./download.sh")
+
+ # The input is in the form of a CSC sparse matrix, so it's a natural fit to
+ # load the data, but we then convert to a more traditional array before
+ # returning
+ data = np.loadtxt('data/ml-100k/u.data')
ij = data[:, :2]
ij -= 1 # original data is in 1-based system
values = data[:, 2]
reviews = sparse.csc_matrix((values, ij.T)).astype(float)
- return reviews
+ return reviews.toarray()
+
+def get_train_test(reviews=None, random_state=None):
+ '''Split data into training & testing
+
+ Parameters
+ ----------
+ reviews : ndarray, optional
+ Input data
+
+ Returns
+ -------
+ train : ndarray
+ training data
+ test : ndarray
+ testing data
+ '''
+ import numpy as np
+ import random
+ r = random.Random(random_state)
+
+ if reviews is None:
+ reviews = load()
+ U,M = np.where(reviews)
+ test_idxs = np.array(r.sample(range(len(U)), len(U)//10))
+ train = reviews.copy()
+ train[U[test_idxs], M[test_idxs]] = 0
+
+ test = np.zeros_like(reviews)
+ test[U[test_idxs], M[test_idxs]] = reviews[U[test_idxs], M[test_idxs]]
+
+ return train, test
+
diff --git a/ch08/norm.py b/ch08/norm.py
new file mode 100644
index 00000000..2925bbca
--- /dev/null
+++ b/ch08/norm.py
@@ -0,0 +1,75 @@
+import numpy as np
+
+class NormalizePositive(object):
+
+ def __init__(self, axis=0):
+ self.axis = axis
+
+ def fit(self, features, y=None):
+ # count features that are greater than zero in axis `self.axis`:
+ if self.axis == 1:
+ features = features.T
+ binary = (features > 0)
+ count = binary.sum(axis=0)
+
+ # to avoid division by zero, set zero counts to one:
+ count[count == 0] = 1.
+
+ self.mean = features.sum(axis=0)/count
+
+ # Compute variance by average squared difference to the mean, but only
+ # consider differences where binary is True (i.e., where there was a
+ # true rating):
+ diff = (features - self.mean) * binary
+ diff **= 2
+ # regularize the estimate of std by adding 0.1
+ self.std = np.sqrt(0.1 + diff.sum(axis=0)/count)
+ return self
+
+ def transform(self, features):
+ if self.axis == 1:
+ features = features.T
+ binary = (features > 0)
+ features = features - self.mean
+ features /= self.std
+ features *= binary
+ if self.axis == 1:
+ features = features.T
+ return features
+
+ def inverse_transform(self, features, copy=True):
+ if copy:
+ features = features.copy()
+ if self.axis == 1:
+ features = features.T
+ features *= self.std
+ features += self.mean
+ if self.axis == 1:
+ features = features.T
+ return features
+
+ def fit_transform(self, features):
+ return self.fit(features).transform(features)
+
+
+def predict(train):
+ norm = NormalizePositive()
+ train = norm.fit_transform(train)
+ return norm.inverse_transform(train * 0.)
+
+
+def main(transpose_inputs=False):
+ from load_ml100k import get_train_test
+ from sklearn import metrics
+ train,test = get_train_test(random_state=12)
+ if transpose_inputs:
+ train = train.T
+ test = test.T
+ predicted = predict(train)
+ r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
+ print('R2 score ({} normalization): {:.1%}'.format(
+ ('movie' if transpose_inputs else 'user'),
+ r2))
+if __name__ == '__main__':
+ main()
+ main(transpose_inputs=True)
diff --git a/ch08/regression.py b/ch08/regression.py
new file mode 100644
index 00000000..693e99a4
--- /dev/null
+++ b/ch08/regression.py
@@ -0,0 +1,50 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+import numpy as np
+from sklearn.linear_model import ElasticNetCV
+from norm import NormalizePositive
+from sklearn import metrics
+
+
+def predict(train):
+ binary = (train > 0)
+ reg = ElasticNetCV(fit_intercept=True, alphas=[
+ 0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
+ norm = NormalizePositive()
+ train = norm.fit_transform(train)
+
+ filled = train.copy()
+ # iterate over all users
+ for u in range(train.shape[0]):
+ # remove the current user for training
+ curtrain = np.delete(train, u, axis=0)
+ bu = binary[u]
+ if np.sum(bu) > 5:
+ reg.fit(curtrain[:,bu].T, train[u, bu])
+
+ # Fill the values that were not there already
+ filled[u, ~bu] = reg.predict(curtrain[:,~bu].T)
+ return norm.inverse_transform(filled)
+
+
+def main(transpose_inputs=False):
+ from load_ml100k import get_train_test
+ train,test = get_train_test(random_state=12)
+ if transpose_inputs:
+ train = train.T
+ test = test.T
+ filled = predict(train)
+ r2 = metrics.r2_score(test[test > 0], filled[test > 0])
+
+ print('R2 score ({} regression): {:.1%}'.format(
+ ('movie' if transpose_inputs else 'user'),
+ r2))
+
+if __name__ == '__main__':
+ main()
+ main(transpose_inputs=True)
diff --git a/ch08/similar_movie.py b/ch08/similar_movie.py
index bbb38ee6..cd49a162 100644
--- a/ch08/similar_movie.py
+++ b/ch08/similar_movie.py
@@ -7,11 +7,26 @@
from __future__ import print_function
import numpy as np
-from load_ml100k import load
-from all_correlations import all_correlations
def nn_movie(ureviews, reviews, uid, mid, k=1):
+ '''Movie neighbor based classifier
+
+ Parameters
+ ----------
+ ureviews : ndarray
+ reviews : ndarray
+ uid : int
+ index of user
+ mid : int
+ index of movie
+ k : int
+ index of neighbor to return
+
+ Returns
+ -------
+ pred : float
+ '''
X = ureviews
y = ureviews[mid].copy()
y -= y.mean()
@@ -33,23 +48,27 @@ def nn_movie(ureviews, reviews, uid, mid, k=1):
def all_estimates(reviews, k=1):
+ '''Estimate all review ratings
+ '''
reviews = reviews.astype(float)
k -= 1
nusers, nmovies = reviews.shape
estimates = np.zeros_like(reviews)
for u in range(nusers):
- ureviews = np.delete(reviews, u, 0)
+ ureviews = np.delete(reviews, u, axis=0)
ureviews -= ureviews.mean(0)
- ureviews /= (ureviews.std(0) + 1e-4)
+ ureviews /= (ureviews.std(0) + 1e-5)
ureviews = ureviews.T.copy()
for m in np.where(reviews[u] > 0)[0]:
estimates[u, m] = nn_movie(ureviews, reviews, u, m, k)
return estimates
if __name__ == '__main__':
- reviews = load().toarray()
+ from load_ml100k import load
+ reviews = load()
estimates = all_estimates(reviews)
error = (estimates - reviews)
error **= 2
error = error[reviews > 0]
- print(np.sqrt(error).mean())
+ rmse = np.sqrt(error.mean())
+ print("RMSE is {0}.".format(rmse))
diff --git a/ch08/stacked.py b/ch08/stacked.py
index 1d6e9fc7..8fa6344e 100644
--- a/ch08/stacked.py
+++ b/ch08/stacked.py
@@ -1,43 +1,47 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-from __future__ import print_function
-from sklearn.linear_model import LinearRegression
-from load_ml100k import load
import numpy as np
-import similar_movie
-import usermodel
+import load_ml100k
+import regression
import corrneighbours
+from sklearn import linear_model, metrics
+import norm
+
+def predict(train):
+ tr_train,tr_test = load_ml100k.get_train_test(train, random_state=34)
+ tr_predicted0 = regression.predict(tr_train)
+ tr_predicted1 = regression.predict(tr_train.T).T
+ tr_predicted2 = corrneighbours.predict(tr_train)
+ tr_predicted3 = corrneighbours.predict(tr_train.T).T
+ tr_predicted4 = norm.predict(tr_train)
+ tr_predicted5 = norm.predict(tr_train.T).T
+ stack_tr = np.array([
+ tr_predicted0[tr_test > 0],
+ tr_predicted1[tr_test > 0],
+ tr_predicted2[tr_test > 0],
+ tr_predicted3[tr_test > 0],
+ tr_predicted4[tr_test > 0],
+ tr_predicted5[tr_test > 0],
+ ]).T
+
+ lr = linear_model.LinearRegression()
+ lr.fit(stack_tr, tr_test[tr_test > 0])
-reviews = load()
-reg = LinearRegression()
-es = np.array([
- usermodel.all_estimates(reviews),
- corrneighbours.all_estimates(reviews),
- similar_movies.all_estimates(reviews),
-])
+ stack_te = np.array([
+ tr_predicted0.ravel(),
+ tr_predicted1.ravel(),
+ tr_predicted2.ravel(),
+ tr_predicted3.ravel(),
+ tr_predicted4.ravel(),
+ tr_predicted5.ravel(),
+ ]).T
-reviews = reviews.toarray()
+ return lr.predict(stack_te).reshape(train.shape)
-total_error = 0.0
-coefficients = []
-for u in xrange(reviews.shape[0]):
- es0 = np.delete(es, u, 1)
- r0 = np.delete(reviews, u, 0)
- X, Y = np.where(r0 > 0)
- X = es[:, X, Y]
- y = r0[r0 > 0]
- reg.fit(X.T, y)
- coefficients.append(reg.coef_)
+def main():
+ train,test = load_ml100k.get_train_test(random_state=12)
+ predicted = predict(train)
+ r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
+ print('R2 stacked: {:.2%}'.format(r2))
- r0 = reviews[u]
- X = np.where(r0 > 0)
- p0 = reg.predict(es[:, u, X].squeeze().T)
- err0 = r0[r0 > 0] - p0
- total_error += np.dot(err0, err0)
- print(u)
+if __name__ == '__main__':
+ main()
diff --git a/ch08/stacked5.py b/ch08/stacked5.py
deleted file mode 100644
index 12f3eacd..00000000
--- a/ch08/stacked5.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-from sklearn.linear_model import LinearRegression
-from load_ml100k import load
-import numpy as np
-import similar_movie
-import usermodel
-import corrneighbours
-
-sreviews = load()
-reviews = sreviews.toarray()
-reg = LinearRegression()
-es = np.array([
- usermodel.all_estimates(sreviews),
- similar_movie.all_estimates(reviews, k=1),
- similar_movie.all_estimates(reviews, k=2),
- similar_movie.all_estimates(reviews, k=3),
- similar_movie.all_estimates(reviews, k=4),
- similar_movie.all_estimates(reviews, k=5),
-])
-
-total_error = 0.0
-coefficients = []
-for u in xrange(reviews.shape[0]):
- es0 = np.delete(es, u, 1)
- r0 = np.delete(reviews, u, 0)
- X, Y = np.where(r0 > 0)
- X = es[:, X, Y]
- y = r0[r0 > 0]
- reg.fit(X.T, y)
- coefficients.append(reg.coef_)
-
- r0 = reviews[u]
- X = np.where(r0 > 0)
- p0 = reg.predict(es[:, u, X].squeeze().T)
- err0 = r0[r0 > 0] - p0
- total_error += np.dot(err0, err0)
-coefficients = np.array(coefficients)
diff --git a/ch08/usermodel.py b/ch08/usermodel.py
deleted file mode 100644
index 40a90a2a..00000000
--- a/ch08/usermodel.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-import numpy as np
-from sklearn.linear_model import LassoCV, RidgeCV, ElasticNetCV
-from sklearn.cross_validation import KFold
-from load_ml100k import load
-
-
-def learn_for(reviews, i):
- reg = ElasticNetCV(fit_intercept=True, alphas=[
- 0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
- u = reviews[i]
- us = range(reviews.shape[0])
- del us[i]
- ps, = np.where(u.toarray().ravel() > 0)
- x = reviews[us][:, ps].T
- y = u.data
- kf = KFold(len(y), n_folds=4)
- predictions = np.zeros(len(ps))
- for train, test in kf:
- xc = x[train].copy().toarray()
- x1 = np.array([xi[xi > 0].mean() for xi in xc])
- x1 = np.nan_to_num(x1)
-
- for i in xrange(xc.shape[0]):
- xc[i] -= (xc[i] > 0) * x1[i]
-
- reg.fit(xc, y[train] - x1)
-
- xc = x[test].copy().toarray()
- x1 = np.array([xi[xi > 0].mean() for xi in xc])
- x1 = np.nan_to_num(x1)
-
- for i in xrange(xc.shape[0]):
- xc[i] -= (xc[i] > 0) * x1[i]
-
- p = np.array(map(reg.predict, xc)).ravel()
- predictions[test] = p
- return predictions
-
-
-def all_estimates(reviews):
- whole_data = []
- for i in xrange(reviews.shape[0]):
- s = learn_for(reviews, i)
- whole_data.append(s)
- return np.array(whole_data)
diff --git a/ch09/01_fft_based_classifier.py b/ch09/01_fft_based_classifier.py
index 9f7aab62..84249724 100644
--- a/ch09/01_fft_based_classifier.py
+++ b/ch09/01_fft_based_classifier.py
@@ -18,8 +18,6 @@
from fft import read_fft
-TEST_DIR = "/media/sf_P/pymlbook-data/09-genre-class/private"
-
genre_list = GENRE_LIST
@@ -83,7 +81,7 @@ def train_model(clf_factory, X, Y, name, plot=False):
if plot:
for label in labels:
- print "Plotting", genre_list[label]
+ print("Plotting %s" % genre_list[label])
scores_to_sort = roc_scores[label]
median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
@@ -96,7 +94,7 @@ def train_model(clf_factory, X, Y, name, plot=False):
all_pr_scores = np.asarray(pr_scores.values()).flatten()
summary = (np.mean(scores), np.std(scores),
np.mean(all_pr_scores), np.std(all_pr_scores))
- print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
+ print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)
@@ -117,7 +115,5 @@ def create_model():
cm_avg = np.mean(cms, axis=0)
cm_norm = cm_avg / np.sum(cm_avg, axis=0)
- print cm_norm
-
plot_confusion_matrix(cm_norm, genre_list, "fft",
"Confusion matrix of an FFT based classifier")
diff --git a/ch09/02_ceps_based_classifier.py b/ch09/02_ceps_based_classifier.py
index e14ea7a6..2791267f 100644
--- a/ch09/02_ceps_based_classifier.py
+++ b/ch09/02_ceps_based_classifier.py
@@ -18,7 +18,6 @@
from ceps import read_ceps
-TEST_DIR = "/media/sf_P/pymlbook-data/09-genre-class/private"
genre_list = GENRE_LIST
@@ -83,7 +82,7 @@ def train_model(clf_factory, X, Y, name, plot=False):
if plot:
for label in labels:
- print "Plotting", genre_list[label]
+ print("Plotting %s" % genre_list[label])
scores_to_sort = roc_scores[label]
median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
@@ -94,7 +93,7 @@ def train_model(clf_factory, X, Y, name, plot=False):
all_pr_scores = np.asarray(pr_scores.values()).flatten()
summary = (np.mean(scores), np.std(scores),
np.mean(all_pr_scores), np.std(all_pr_scores))
- print "%.3f\t%.3f\t%.3f\t%.3f\t" % summary
+ print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)
return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)
@@ -115,7 +114,5 @@ def create_model():
cm_avg = np.mean(cms, axis=0)
cm_norm = cm_avg / np.sum(cm_avg, axis=0)
- print cm_norm
-
plot_confusion_matrix(cm_norm, genre_list, "ceps",
"Confusion matrix of a CEPS based classifier")
diff --git a/ch09/Makefile b/ch09/Makefile
index 16b38e20..eafc0f57 100644
--- a/ch09/Makefile
+++ b/ch09/Makefile
@@ -1,17 +1,4 @@
-CHART_DIR = ../charts
-TARGET_DIR = /media/sf_P/Dropbox/pymlbook/pmle/ch09
-
-copy: rocs_fft.png rocs_ceps.png fft_demo
- #cp $(CHART_DIR)/Spectrogram_Genres.png $(TARGET_DIR)/1400_09_01.png
- cp $(CHART_DIR)/confusion_matrix_fft.png $(TARGET_DIR)/1400_09_02.png
- cp $(CHART_DIR)/rocs_fft.png $(TARGET_DIR)/1400_09_03.png
- cp $(CHART_DIR)/rocs_ceps.png $(TARGET_DIR)/1400_09_04.png
- cp fft_demo.png $(TARGET_DIR)/1400_09_05.png
- cp fft_example.png $(TARGET_DIR)/1400_09_06.png
- cp $(CHART_DIR)/confusion_matrix_ceps.png $(TARGET_DIR)/1400_09_07.png
- cp roc_pr.png $(TARGET_DIR)/1400_09_08.png
- cp *.py $(TARGET_DIR)/code
- cp Makefile $(TARGET_DIR)/code
+CHART_DIR = charts
fft:
python 01_fft_based_classifier.py
diff --git a/ch09/ceps.py b/ch09/ceps.py
index 239b0885..30a6f1cf 100644
--- a/ch09/ceps.py
+++ b/ch09/ceps.py
@@ -24,7 +24,7 @@ def write_ceps(ceps, fn):
base_fn, ext = os.path.splitext(fn)
data_fn = base_fn + ".ceps"
np.save(data_fn, ceps)
- print "Written", data_fn
+ print("Written %s"%data_fn)
def create_ceps(fn):
@@ -51,6 +51,6 @@ def read_ceps(genre_list, base_dir=GENRE_DIR):
if __name__ == "__main__":
os.chdir(GENRE_DIR)
glob_wav = os.path.join(sys.argv[1], "*.wav")
- print glob_wav
+ print(glob_wav)
for fn in glob.glob(glob_wav):
create_ceps(fn)
diff --git a/ch09/fft.py b/ch09/fft.py
index 5ff5a96f..0754c3ad 100644
--- a/ch09/fft.py
+++ b/ch09/fft.py
@@ -27,7 +27,7 @@ def write_fft(fft_features, fn):
data_fn = base_fn + ".fft"
np.save(data_fn, fft_features)
- print "Written", data_fn
+ print("Written "%data_fn)
def create_fft(fn):
diff --git a/ch09/utils.py b/ch09/utils.py
index dea0575b..d74f2169 100644
--- a/ch09/utils.py
+++ b/ch09/utils.py
@@ -6,16 +6,32 @@
# It is made available under the MIT License
import os
+import sys
from matplotlib import pylab
import numpy as np
-DATA_DIR = os.path.join("..", "data")
-CHART_DIR = os.path.join("..", "charts")
+DATA_DIR = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "data")
-GENRE_DIR = "/media/sf_P/pymlbook-data/09-genre-class/genres"
+CHART_DIR = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "charts")
+
+for d in [DATA_DIR, CHART_DIR]:
+ if not os.path.exists(d):
+ os.mkdir(d)
+
+# Put your directory to the different music genres here
+GENRE_DIR = None
GENRE_LIST = ["classical", "jazz", "country", "pop", "rock", "metal"]
+# Put your directory to the test dir here
+TEST_DIR = None
+
+if GENRE_DIR is None or TEST_DIR is None:
+ print("Please set GENRE_DIR and TEST_DIR in utils.py")
+ sys.exit(1)
+
def plot_confusion_matrix(cm, genre_list, name, title):
pylab.clf()
@@ -76,7 +92,7 @@ def show_most_informative_features(vectorizer, clf, n=20):
c_f = sorted(zip(clf.coef_[0], vectorizer.get_feature_names()))
top = zip(c_f[:n], c_f[:-(n + 1):-1])
for (c1, f1), (c2, f2) in top:
- print "\t%.4f\t%-15s\t\t%.4f\t%-15s" % (c1, f1, c2, f2)
+ print("\t%.4f\t%-15s\t\t%.4f\t%-15s" % (c1, f1, c2, f2))
def plot_log():
diff --git a/ch10/.gitignore b/ch10/.gitignore
new file mode 100644
index 00000000..2f266195
--- /dev/null
+++ b/ch10/.gitignore
@@ -0,0 +1 @@
+AnimTransDistr/
diff --git a/ch10/README.rst b/ch10/README.rst
new file mode 100644
index 00000000..91e32756
--- /dev/null
+++ b/ch10/README.rst
@@ -0,0 +1,37 @@
+==========
+Chapter 10
+==========
+
+Support code for *Chapter 10: Pattern Recognition & Computer Vision*
+
+Data
+----
+
+This chapter relies on a publicly available dataset (which can be downloaded
+using the ``download.sh`` script inside the ``data/`` directory) as well the
+dataset that is packaged with the repository at ``../SimpleImageDataset/``.
+
+Running ``download.sh`` will retrieve the other dataset into a directory
+``AnimTransDistr/``.
+
+Scripts
+-------
+
+chapter.py
+ Code as written in the book.
+thresholded_figure.py
+ Computes the thresholded figures, including after Gaussian blurring
+lena-ring.py
+ Lena image with center in focus and blurred edges
+figure10.py
+ Just paste two images next to each others
+features.py
+ Contains the color histogram function from the book as well as a simple
+ wrapper around ``mahotas.texture.haralick``
+simple_classification.py
+ Classify SimpleImageDataset with texture features + color histogram features
+large_classification.py
+ Classify ``AnimTransDistr`` with both texture and SURF features.
+neighbors.py
+ Computes image neighbors as well as the neighbor figure from the book.
+
diff --git a/ch10/chapter.py b/ch10/chapter.py
new file mode 100644
index 00000000..233720bb
--- /dev/null
+++ b/ch10/chapter.py
@@ -0,0 +1,186 @@
+import numpy as np
+import mahotas as mh
+image = mh.imread('scene00.jpg')
+from matplotlib import pyplot as plt
+plt.imshow(image)
+plt.show()
+image = mh.colors.rgb2grey(image, dtype=np.uint8)
+plt.imshow(image) # Display the image
+plt.gray()
+thresh = mh.thresholding.otsu(image)
+print('Otsu threshold is {}.'.format(thresh))
+# Otsu threshold is 138.
+plt.imshow(image > thresh)
+
+im16 = mh.gaussian_filter(image,16)
+im = mh.demos.load('lenna')
+
+r,g,b = im.transpose(2,0,1)
+r12 = mh.gaussian_filter(r, 12.)
+g12 = mh.gaussian_filter(g, 12.)
+b12 = mh.gaussian_filter(b, 12.)
+im12 = mh.as_rgb(r12,g12,b12)
+h, w = r.shape # height and width
+Y, X = np.mgrid[:h,:w]
+Y = Y-h/2. # center at h/2
+Y = Y / Y.max() # normalize to -1 .. +1
+
+X = X-w/2.
+X = X / X.max()
+
+C = np.exp(-2.*(X**2+ Y**2))
+
+# Normalize again to 0..1
+C = C - C.min()
+C = C / C.ptp()
+C = C[:,:,None] # This adds a dummy third dimension to C
+
+ringed = mh.stretch(im*C + (1-C)*im12)
+
+haralick_features = mh.features.haralick(image)
+haralick_features_mean = np.mean(haralick_features, axis=0)
+haralick_features_all = np.ravel(haralick_features)
+
+from glob import glob
+images = glob('../SimpleImageDataset/*.jpg')
+features = []
+labels = []
+for im in images:
+ labels.append(im[:-len('00.jpg')])
+ im = mh.imread(im)
+ im = mh.colors.rgb2gray(im, dtype=np.uint8)
+ features.append(mh.features.haralick(im).ravel())
+
+features = np.array(features)
+labels = np.array(labels)
+
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LogisticRegression
+clf = Pipeline([('preproc', StandardScaler()),
+ ('classifier', LogisticRegression())])
+
+
+from sklearn import cross_validation
+cv = cross_validation.LeaveOneOut(len(images))
+scores = cross_validation.cross_val_score(
+ clf, features, labels, cv=cv)
+print('Accuracy: {:.1%}'.format(scores.mean()))
+# Accuracy: 81.1%
+
+def chist(im):
+ im = im // 64
+ r,g,b = im.transpose((2,0,1))
+ pixels = 1 * r + 4 * b + 16 * g
+ hist = np.bincount(pixels.ravel(), minlength=64)
+ hist = hist.astype(float)
+ hist = np.log1p(hist)
+ return hist
+
+features = []
+for im in images:
+ im = mh.imread(im)
+ features.append(chist(im))
+
+
+
+features = []
+for im in images:
+ imcolor = mh.imread(im)
+ im = mh.colors.rgb2gray(imcolor, dtype=np.uint8)
+ features.append(np.concatenate([
+ mh.features.haralick(im).ravel(),
+ chist(imcolor),
+ ]))
+
+
+scores = cross_validation.cross_val_score(
+ clf, features, labels, cv=cv)
+print('Accuracy: {:.1%}'.format(scores.mean()))
+# Accuracy: 95.6%
+
+
+features = []
+for im in images:
+ imcolor = mh.imread(im)
+ # Ignore everything in the 200 pixels close to the borders
+ imcolor = imcolor[200:-200, 200:-200]
+ im = mh.colors.rgb2gray(imcolor, dtype=np.uint8)
+ features.append(np.concatenate([
+ mh.features.haralick(im).ravel(),
+ chist(imcolor),
+ ]))
+
+sc = StandardScaler()
+features = sc.fit_transform(features)
+from scipy.spatial import distance
+dists = distance.squareform(distance.pdist(features))
+
+
+fig, axes = plt.subplots(2, 9)
+for ci,i in enumerate(range(0,90,10)):
+ left = images[i]
+ dists_left = dists[i]
+ right = dists_left.argsort()
+ # right[0] is the same as left[i], so pick the next closest element
+ right = right[1]
+ right = images[right]
+ left = mh.imread(left)
+ right = mh.imread(right)
+ axes[0, ci].imshow(left)
+ axes[1, ci].imshow(right)
+
+
+
+from sklearn.grid_search import GridSearchCV
+C_range = 10.0 ** np.arange(-4, 3)
+grid = GridSearchCV(LogisticRegression(), param_grid={'C' : C_range})
+clf = Pipeline([('preproc', StandardScaler()),
+ ('classifier', grid)])
+
+cv = cross_validation.KFold(len(features), 5,
+ shuffle=True, random_state=123)
+scores = cross_validation.cross_val_score(
+ clf, features, labels, cv=cv)
+print('Accuracy: {:.1%}'.format(scores.mean()))
+
+
+
+
+from mahotas.features import surf
+image = mh.demos.load('lena')
+image = mh.colors.rgb2gray(image, dtype=np.uint8)
+descriptors = surf.surf(image, descriptor_only=True)
+
+from mahotas.features import surf
+descriptors = surf.dense(image, spacing=16)
+alldescriptors = []
+for im in images:
+ im = mh.imread(im, as_grey=True)
+ im = im.astype(np.uint8)
+ alldescriptors.append(surf.dense(image, spacing=16))
+# get all descriptors into a single array
+concatenated = np.concatenate(alldescriptors)
+print('Number of descriptors: {}'.format(
+ len(concatenated)))
+# use only every 64th vector
+concatenated = concatenated[::64]
+from sklearn.cluster import KMeans # FIXME CAPITALIZATION
+k = 256
+km = KMeans(k)
+km.fit(concatenated)
+
+features = []
+for d in alldescriptors:
+ c = km.predict(d)
+ features.append(
+ np.array([np.sum(c == ci) for ci in range(k)])
+ )
+# build single array and convert to float
+features = np.array(features, dtype=float)
+scores = cross_validation.cross_val_score(
+ clf, features, labels, cv=cv)
+print('Accuracy: {:.1%}'.format(scores.mean()))
+# Accuracy: 62.6%
+
+
diff --git a/ch10/get-anim-dataset.sh b/ch10/download.sh
similarity index 70%
rename from ch10/get-anim-dataset.sh
rename to ch10/download.sh
index 93121a7f..fb623f3d 100755
--- a/ch10/get-anim-dataset.sh
+++ b/ch10/download.sh
@@ -2,7 +2,7 @@
mkdir -p AnimTransDistr
cd AnimTransDistr
-wget http://vision.stanford.edu/Datasets/AnimTransDistr.rar
+curl -O http://vision.stanford.edu/Datasets/AnimTransDistr.rar
unrar x AnimTransDistr.rar
# The following file is a weird file:
rm Anims/104034.jpg
diff --git a/ch10/edginess.py b/ch10/edginess.py
deleted file mode 100644
index a2041f71..00000000
--- a/ch10/edginess.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-import numpy as np
-import mahotas as mh
-
-
-def edginess_sobel(image):
- '''
- edgi = edginess_sobel(image)
-
- Measure the "edginess" of an image
- '''
- edges = mh.sobel(image, just_filter=True)
- edges = edges.ravel()
- return np.sqrt(np.dot(edges, edges))
diff --git a/ch10/features.py b/ch10/features.py
new file mode 100644
index 00000000..42847b30
--- /dev/null
+++ b/ch10/features.py
@@ -0,0 +1,70 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+import numpy as np
+import mahotas as mh
+
+
+def edginess_sobel(image):
+ '''Measure the "edginess" of an image
+
+ image should be a 2d numpy array (an image)
+
+ Returns a floating point value which is higher the "edgier" the image is.
+
+ '''
+ edges = mh.sobel(image, just_filter=True)
+ edges = edges.ravel()
+ return np.sqrt(np.dot(edges, edges))
+
+def texture(im):
+ '''Compute features for an image
+
+ Parameters
+ ----------
+ im : ndarray
+
+ Returns
+ -------
+ fs : ndarray
+ 1-D array of features
+ '''
+ im = im.astype(np.uint8)
+ return mh.features.haralick(im).ravel()
+
+
+def chist(im):
+ '''Compute color histogram of input image
+
+ Parameters
+ ----------
+ im : ndarray
+ should be an RGB image
+
+ Returns
+ -------
+ c : ndarray
+ 1-D array of histogram values
+ '''
+
+ # Downsample pixel values:
+ im = im // 64
+
+ # We can also implement the following by using np.histogramdd
+ # im = im.reshape((-1,3))
+ # bins = [np.arange(5), np.arange(5), np.arange(5)]
+ # hist = np.histogramdd(im, bins=bins)[0]
+ # hist = hist.ravel()
+
+ # Separate RGB channels:
+ r,g,b = im.transpose((2,0,1))
+
+ pixels = 1 * r + 4 * g + 16 * b
+ hist = np.bincount(pixels.ravel(), minlength=64)
+ hist = hist.astype(float)
+ return np.log1p(hist)
+
diff --git a/ch10/figure10.py b/ch10/figure10.py
index ef5b938d..6cb45e7a 100644
--- a/ch10/figure10.py
+++ b/ch10/figure10.py
@@ -8,11 +8,13 @@
import numpy as np
import mahotas as mh
-text = mh.imread("simple-dataset/text21.jpg")
-scene = mh.imread("simple-dataset/scene00.jpg")
+# This little script just builds an image with two examples, side-by-side:
+
+text = mh.imread("../SimpleImageDataset/text21.jpg")
+building = mh.imread("../SimpleImageDataset/building00.jpg")
h, w, _ = text.shape
canvas = np.zeros((h, 2 * w + 128, 3), np.uint8)
-canvas[:, -w:] = scene
+canvas[:, -w:] = building
canvas[:, :w] = text
canvas = canvas[::4, ::4]
-mh.imsave('../1400OS_10_10+.jpg', canvas)
+mh.imsave('figure10.jpg', canvas)
diff --git a/ch10/figure13.py b/ch10/figure13.py
deleted file mode 100644
index 8207510f..00000000
--- a/ch10/figure13.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-import mahotas as mh
-from mahotas.colors import rgb2grey
-import numpy as np
-
-im = mh.imread('lenna.jpg')
-im = rgb2grey(im)
-
-salt = np.random.random(im.shape) > .975
-pepper = np.random.random(im.shape) > .975
-
-im = np.maximum(salt * 170, mh.stretch(im))
-im = np.minimum(pepper * 30 + im * (~pepper), im)
-
-mh.imsave('../1400OS_10_13+.jpg', im.astype(np.uint8))
diff --git a/ch10/figure18.py b/ch10/figure18.py
deleted file mode 100644
index ee11cf05..00000000
--- a/ch10/figure18.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-import mahotas as mh
-from sklearn import cross_validation
-from sklearn.linear_model.logistic import LogisticRegression
-from mpltools import style
-from matplotlib import pyplot as plt
-import numpy as np
-from glob import glob
-
-basedir = 'AnimTransDistr'
-
-
-def features_for(images):
- fs = []
- for im in images:
- im = mh.imread(im, as_grey=True).astype(np.uint8)
- fs.append(mh.features.haralick(im).mean(0))
- return np.array(fs)
-
-
-def features_labels(groups):
- labels = np.zeros(sum(map(len, groups)))
- st = 0
- for i, g in enumerate(groups):
- labels[st:st + len(g)] = i
- st += len(g)
- return np.vstack(groups), labels
-
-classes = [
- 'Anims',
- 'Cars',
- 'Distras',
- 'Trans',
-]
-
-features = []
-labels = []
-for ci, cl in enumerate(classes):
- images = glob('{}/{}/*.jpg'.format(basedir, cl))
- features.extend(features_for(images))
- labels.extend([ci for _ in images])
-
-features = np.array(features)
-labels = np.array(labels)
-
-scores0 = cross_validation.cross_val_score(
- LogisticRegression(), features, labels, cv=10)
-print('Accuracy (5 fold x-val) with Logistic Regrssion [std features]: %s%%' % (
- 0.1 * round(1000 * scores0.mean())))
-
-tfeatures = features
-
-from sklearn.cluster import KMeans
-from mahotas.features import surf
-
-images = []
-labels = []
-
-for ci, cl in enumerate(classes):
- curimages = glob('{}/{}/*.jpg'.format(basedir, cl))
- images.extend(curimages)
- labels.extend([ci for _ in curimages])
-labels = np.array(labels)
-
-alldescriptors = []
-for im in images:
- im = mh.imread(im, as_grey=1)
- im = im.astype(np.uint8)
-
- #alldescriptors.append(surf.dense(im, spacing=max(im.shape)//32))
- alldescriptors.append(surf.surf(im, descriptor_only=True))
-
-print('Descriptors done')
-k = 256
-km = KMeans(k)
-
-concatenated = np.concatenate(alldescriptors)
-concatenated = concatenated[::64]
-print('k-meaning...')
-km.fit(concatenated)
-features = []
-for d in alldescriptors:
- c = km.predict(d)
- features.append(
- np.array([np.sum(c == i) for i in xrange(k)])
- )
-features = np.array(features)
-print('predicting...')
-scoreSURFlr = cross_validation.cross_val_score(
- LogisticRegression(), features, labels, cv=5).mean()
-print('Accuracy (5 fold x-val) with Log. Reg [SURF features]: %s%%' % (
- 0.1 * round(1000 * scoreSURFlr.mean())))
-
-print('combined...')
-allfeatures = np.hstack([features, tfeatures])
-scoreSURFplr = cross_validation.cross_val_score(
- LogisticRegression(), allfeatures, labels, cv=5).mean()
-
-print('Accuracy (5 fold x-val) with Log. Reg [All features]: %s%%' % (
- 0.1 * round(1000 * scoreSURFplr.mean())))
-
-style.use('ggplot')
-plt.plot([0, 1, 2], 100 *
- np.array([scores0.mean(), scoreSURFlr, scoreSURFplr]), 'k-', lw=8)
-plt.plot(
- [0, 1, 2], 100 * np.array([scores0.mean(), scoreSURFlr, scoreSURFplr]),
- 'o', mec='#cccccc', mew=12, mfc='white')
-plt.xlim(-.5, 2.5)
-plt.ylim(scores0.mean() * 90., scoreSURFplr * 110)
-plt.xticks([0, 1, 2], ["baseline", "SURF", "combined"])
-plt.ylabel('Accuracy (%)')
-plt.savefig('../1400OS_10_18+.png')
diff --git a/ch10/figure5_6.py b/ch10/figure5_6.py
deleted file mode 100644
index 100c84a7..00000000
--- a/ch10/figure5_6.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-from matplotlib import pyplot as plt
-import numpy as np
-import mahotas as mh
-image = mh.imread('../1400OS_10_01.jpeg')
-image = mh.colors.rgb2gray(image)
-im8 = mh.gaussian_filter(image, 8)
-im16 = mh.gaussian_filter(image, 16)
-im32 = mh.gaussian_filter(image, 32)
-h, w = im8.shape
-canvas = np.ones((h, 3 * w + 256), np.uint8)
-canvas *= 255
-canvas[:, :w] = im8
-canvas[:, w + 128:2 * w + 128] = im16
-canvas[:, -w:] = im32
-mh.imsave('../1400OS_10_05+.jpg', canvas[:, ::2])
-
-im32 = mh.stretch(im32)
-ot32 = mh.otsu(im32)
-mh.imsave('../1400OS_10_06+.jpg', (im32 > ot32).astype(np.uint8) * 255)
diff --git a/ch10/figure9.py b/ch10/figure9.py
deleted file mode 100644
index 6d3db1a4..00000000
--- a/ch10/figure9.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# This code is supporting material for the book
-# Building Machine Learning Systems with Python
-# by Willi Richert and Luis Pedro Coelho
-# published by PACKT Publishing
-#
-# It is made available under the MIT License
-
-from matplotlib import pyplot as plt
-import numpy as np
-import mahotas as mh
-image = mh.imread('../1400OS_10_01.jpeg')
-image = mh.colors.rgb2gray(image, dtype=np.uint8)
-image = image[::4, ::4]
-thresh = mh.sobel(image)
-filtered = mh.sobel(image, just_filter=True)
-
-thresh = mh.dilate(thresh, np.ones((7, 7)))
-filtered = mh.dilate(mh.stretch(filtered), np.ones((7, 7)))
-
-
-h, w = thresh.shape
-canvas = 255 * np.ones((h, w * 2 + 64), np.uint8)
-canvas[:, :w] = thresh * 255
-canvas[:, -w:] = filtered
-
-mh.imsave('../1400OS_10_09+.jpg', canvas)
diff --git a/ch10/large_classification.py b/ch10/large_classification.py
new file mode 100644
index 00000000..8db3571b
--- /dev/null
+++ b/ch10/large_classification.py
@@ -0,0 +1,108 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+from __future__ import print_function
+import mahotas as mh
+from glob import glob
+from sklearn import cross_validation
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.grid_search import GridSearchCV
+import numpy as np
+
+basedir = 'AnimTransDistr'
+print('This script will test classification of the AnimTransDistr dataset')
+
+C_range = 10.0 ** np.arange(-4, 3)
+grid = GridSearchCV(LogisticRegression(), param_grid={'C' : C_range})
+clf = Pipeline([('preproc', StandardScaler()),
+ ('classifier', grid)])
+
+def features_for(im):
+ from features import chist
+ im = mh.imread(im)
+ img = mh.colors.rgb2grey(im).astype(np.uint8)
+ return np.concatenate([mh.features.haralick(img).ravel(),
+ chist(im)])
+
+def images():
+ '''Iterate over all (image,label) pairs
+
+ This function will return
+ '''
+ for ci, cl in enumerate(classes):
+ images = glob('{}/{}/*.jpg'.format(basedir, cl))
+ for im in sorted(images):
+ yield im, ci
+
+classes = [
+ 'Anims',
+ 'Cars',
+ 'Distras',
+ 'Trans',
+]
+
+print('Computing whole-image texture features...')
+ifeatures = []
+labels = []
+for im, ell in images():
+ ifeatures.append(features_for(im))
+ labels.append(ell)
+
+ifeatures = np.array(ifeatures)
+labels = np.array(labels)
+
+cv = cross_validation.KFold(len(ifeatures), 5, shuffle=True, random_state=123)
+scores0 = cross_validation.cross_val_score(
+ clf, ifeatures, labels, cv=cv)
+print('Accuracy (5 fold x-val) with Logistic Regression [image features]: {:.1%}'.format(
+ scores0.mean()))
+
+
+from sklearn.cluster import KMeans
+from mahotas.features import surf
+
+
+print('Computing SURF descriptors...')
+alldescriptors = []
+for im,_ in images():
+ im = mh.imread(im, as_grey=True)
+ im = im.astype(np.uint8)
+
+ # To use dense sampling, you can try the following line:
+ # alldescriptors.append(surf.dense(im, spacing=16))
+ alldescriptors.append(surf.surf(im, descriptor_only=True))
+
+print('Descriptor computation complete.')
+k = 256
+km = KMeans(k)
+
+concatenated = np.concatenate(alldescriptors)
+print('Number of descriptors: {}'.format(
+ len(concatenated)))
+concatenated = concatenated[::64]
+print('Clustering with K-means...')
+km.fit(concatenated)
+sfeatures = []
+for d in alldescriptors:
+ c = km.predict(d)
+ sfeatures.append(np.bincount(c, minlength=k))
+sfeatures = np.array(sfeatures, dtype=float)
+print('predicting...')
+score_SURF = cross_validation.cross_val_score(
+ clf, sfeatures, labels, cv=cv).mean()
+print('Accuracy (5 fold x-val) with Logistic Regression [SURF features]: {:.1%}'.format(
+ score_SURF.mean()))
+
+
+print('Performing classification with all features combined...')
+allfeatures = np.hstack([sfeatures, ifeatures])
+score_SURF_global = cross_validation.cross_val_score(
+ clf, allfeatures, labels, cv=cv).mean()
+print('Accuracy (5 fold x-val) with Logistic Regression [All features]: {:.1%}'.format(
+ score_SURF_global.mean()))
diff --git a/ch10/lenna-ring.py b/ch10/lena-ring.py
similarity index 65%
rename from ch10/lenna-ring.py
rename to ch10/lena-ring.py
index ae04e33f..fb28b53d 100644
--- a/ch10/lenna-ring.py
+++ b/ch10/lena-ring.py
@@ -7,12 +7,20 @@
import mahotas as mh
import numpy as np
-im = mh.imread('lenna.jpg')
+
+# Read in the image
+im = mh.demos.load('lena')
+
+# This breaks up the image into RGB channels
r, g, b = im.transpose(2, 0, 1)
h, w = r.shape
+
+# smooth the image per channel:
r12 = mh.gaussian_filter(r, 12.)
g12 = mh.gaussian_filter(g, 12.)
b12 = mh.gaussian_filter(b, 12.)
+
+# build back the RGB image
im12 = mh.as_rgb(r12, g12, b12)
X, Y = np.mgrid[:h, :w]
@@ -20,10 +28,14 @@
Y = Y - w / 2.
X /= X.max()
Y /= Y.max()
+
+# Array C will have the highest values in the center, fading out to the edges:
+
C = np.exp(-2. * (X ** 2 + Y ** 2))
C -= C.min()
C /= C.ptp()
C = C[:, :, None]
+# The final result is sharp in the centre and smooths out to the borders:
ring = mh.stretch(im * C + (1 - C) * im12)
-mh.imsave('lenna-ring.jpg', ring)
+mh.imsave('lena-ring.jpg', ring)
diff --git a/ch10/neighbors.py b/ch10/neighbors.py
new file mode 100644
index 00000000..1f71d0de
--- /dev/null
+++ b/ch10/neighbors.py
@@ -0,0 +1,64 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+
+import numpy as np
+import mahotas as mh
+from glob import glob
+from features import texture, chist
+from matplotlib import pyplot as plt
+from sklearn.preprocessing import StandardScaler
+from scipy.spatial import distance
+
+basedir = '../SimpleImageDataset/'
+
+
+haralicks = []
+chists = []
+
+print('Computing features...')
+# Use glob to get all the images
+images = glob('{}/*.jpg'.format(basedir))
+# We sort the images to ensure that they are always processed in the same order
+# Otherwise, this would introduce some variation just based on the random
+# ordering that the filesystem uses
+images.sort()
+
+for fname in images:
+ imc = mh.imread(fname)
+ imc = imc[200:-200,200:-200]
+ haralicks.append(texture(mh.colors.rgb2grey(imc)))
+ chists.append(chist(imc))
+
+haralicks = np.array(haralicks)
+chists = np.array(chists)
+features = np.hstack([chists, haralicks])
+
+print('Computing neighbors...')
+sc = StandardScaler()
+features = sc.fit_transform(features)
+dists = distance.squareform(distance.pdist(features))
+
+print('Plotting...')
+fig, axes = plt.subplots(2, 9, figsize=(16,8))
+
+# Remove ticks from all subplots
+for ax in axes.flat:
+ ax.set_xticks([])
+ ax.set_yticks([])
+
+for ci,i in enumerate(range(0,90,10)):
+ left = images[i]
+ dists_left = dists[i]
+ right = dists_left.argsort()
+ # right[0] is the same as left[i], so pick the next closest element
+ right = right[1]
+ right = images[right]
+ left = mh.imread(left)
+ right = mh.imread(right)
+ axes[0, ci].imshow(left)
+ axes[1, ci].imshow(right)
+
+fig.tight_layout()
+fig.savefig('figure_neighbors.png', dpi=300)
diff --git a/ch10/scene00.jpg b/ch10/scene00.jpg
new file mode 100644
index 00000000..ed727a50
Binary files /dev/null and b/ch10/scene00.jpg differ
diff --git a/ch10/simple_classification.py b/ch10/simple_classification.py
index 2f9ebfa4..a5a448d2 100644
--- a/ch10/simple_classification.py
+++ b/ch10/simple_classification.py
@@ -6,37 +6,65 @@
# It is made available under the MIT License
import mahotas as mh
-from sklearn import cross_validation
-from sklearn.linear_model.logistic import LogisticRegression
import numpy as np
from glob import glob
-from edginess import edginess_sobel
-basedir = 'simple-dataset'
+from features import texture, chist
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+basedir = '../SimpleImageDataset/'
-def features_for(im):
- im = mh.imread(im, as_grey=True).astype(np.uint8)
- return mh.features.haralick(im).mean(0)
-features = []
-sobels = []
+haralicks = []
labels = []
+chists = []
+
+print('This script will test (with cross-validation) classification of the simple 3 class dataset')
+print('Computing features...')
+# Use glob to get all the images
images = glob('{}/*.jpg'.format(basedir))
-for im in images:
- features.append(features_for(im))
- sobels.append(edginess_sobel(mh.imread(im, as_grey=True)))
- labels.append(im[:-len('00.jpg')])
-features = np.array(features)
+# We sort the images to ensure that they are always processed in the same order
+# Otherwise, this would introduce some variation just based on the random
+# ordering that the filesystem uses
+for fname in sorted(images):
+ imc = mh.imread(fname)
+ haralicks.append(texture(mh.colors.rgb2grey(imc)))
+ chists.append(chist(imc))
+
+ # Files are named like building00.jpg, scene23.jpg...
+ labels.append(fname[:-len('xx.jpg')])
+
+print('Finished computing features.')
+
+haralicks = np.array(haralicks)
labels = np.array(labels)
+chists = np.array(chists)
+
+haralick_plus_chists = np.hstack([chists, haralicks])
+
+# We use Logistic Regression because it achieves high accuracy on small(ish) datasets
+# Feel free to experiment with other classifiers
+clf = Pipeline([('preproc', StandardScaler()),
+ ('classifier', LogisticRegression())])
+
+from sklearn import cross_validation
+cv = cross_validation.LeaveOneOut(len(images))
scores = cross_validation.cross_val_score(
- LogisticRegression(), features, labels, cv=5)
-print('Accuracy (5 fold x-val) with Logistic Regrssion [std features]: {}%'.format(
- 0.1 * round(1000 * scores.mean())))
+ clf, haralicks, labels, cv=cv)
+print('Accuracy (Leave-one-out) with Logistic Regression [haralick features]: {:.1%}'.format(
+ scores.mean()))
scores = cross_validation.cross_val_score(
- LogisticRegression(), np.hstack([np.atleast_2d(sobels).T, features]), labels, cv=5).mean()
-print('Accuracy (5 fold x-val) with Logistic Regrssion [std features + sobel]: {}%'.format(
- 0.1 * round(1000 * scores.mean())))
+ clf, chists, labels, cv=cv)
+print('Accuracy (Leave-one-out) with Logistic Regression [color histograms]: {:.1%}'.format(
+ scores.mean()))
+
+scores = cross_validation.cross_val_score(
+ clf, haralick_plus_chists, labels, cv=cv)
+print('Accuracy (Leave-one-out) with Logistic Regression [texture features + color histograms]: {:.1%}'.format(
+ scores.mean()))
+
diff --git a/ch10/threshold.py b/ch10/threshold.py
index 51f32861..d7361bfb 100644
--- a/ch10/threshold.py
+++ b/ch10/threshold.py
@@ -7,15 +7,28 @@
import numpy as np
import mahotas as mh
-image = mh.imread('../1400OS_10_01.jpeg')
+
+# Load our example image:
+image = mh.imread('../SimpleImageDataset/building05.jpg')
+
+# Convert to greyscale
image = mh.colors.rgb2gray(image, dtype=np.uint8)
+
+# Compute a threshold value:
thresh = mh.thresholding.otsu(image)
-print(thresh)
+print('Otsu threshold is {0}'.format(thresh))
+
+# Compute the thresholded image
otsubin = (image > thresh)
+print('Saving thresholded image (with Otsu threshold) to otsu-threshold.jpeg')
mh.imsave('otsu-threshold.jpeg', otsubin.astype(np.uint8) * 255)
-otsubin = ~ mh.close(~otsubin, np.ones((15, 15)))
+
+# Execute morphological opening to smooth out the edges
+otsubin = mh.open(otsubin, np.ones((15, 15)))
mh.imsave('otsu-closed.jpeg', otsubin.astype(np.uint8) * 255)
+# An alternative thresholding method:
thresh = mh.thresholding.rc(image)
-print(thresh)
+print('Ridley-Calvard threshold is {0}'.format(thresh))
+print('Saving thresholded image (with Ridley-Calvard threshold) to rc-threshold.jpeg')
mh.imsave('rc-threshold.jpeg', (image > thresh).astype(np.uint8) * 255)
diff --git a/ch10/thresholded_figure.py b/ch10/thresholded_figure.py
new file mode 100644
index 00000000..947762e8
--- /dev/null
+++ b/ch10/thresholded_figure.py
@@ -0,0 +1,31 @@
+import mahotas as mh
+import numpy as np
+from matplotlib import pyplot as plt
+
+# Load image & convert to B&W
+image = mh.imread('../SimpleImageDataset/scene00.jpg')
+image = mh.colors.rgb2grey(image, dtype=np.uint8)
+plt.imshow(image)
+plt.gray()
+plt.title('original image')
+
+thresh = mh.thresholding.otsu(image)
+print('Otsu threshold is {}.'.format(thresh))
+
+threshed = (image > thresh)
+plt.figure()
+plt.imshow(threshed)
+plt.title('threholded image')
+mh.imsave('thresholded.png', threshed.astype(np.uint8)*255)
+
+im16 = mh.gaussian_filter(image, 16)
+
+# Repeat the thresholding operations with the blurred image
+thresh = mh.thresholding.otsu(im16.astype(np.uint8))
+threshed = (im16 > thresh)
+plt.figure()
+plt.imshow(threshed)
+plt.title('threholded image (after blurring)')
+print('Otsu threshold after blurring is {}.'.format(thresh))
+mh.imsave('thresholded16.png', threshed.astype(np.uint8)*255)
+plt.show()
diff --git a/ch11/Makefile b/ch11/Makefile
deleted file mode 100644
index e55d478d..00000000
--- a/ch11/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-CHART_DIR = ../charts
-TARGET_DIR = /media/sf_P/Dropbox/pymlbook/pmle/ch11
-
-copy:
- cp $(CHART_DIR)/corr_demo_1.png $(TARGET_DIR)/1400_11_01.png
- cp $(CHART_DIR)/corr_demo_2.png $(TARGET_DIR)/1400_11_02.png
-
- cp $(CHART_DIR)/entropy_demo.png $(TARGET_DIR)/1400_11_03.png
- cp $(CHART_DIR)/mi_demo_1.png $(TARGET_DIR)/1400_11_04.png
- cp $(CHART_DIR)/mi_demo_2.png $(TARGET_DIR)/1400_11_05.png
-
- cp $(CHART_DIR)/pca_demo_1.png $(TARGET_DIR)/1400_11_08.png
- cp $(CHART_DIR)/pca_demo_2.png $(TARGET_DIR)/1400_11_09.png
- cp $(CHART_DIR)/lda_demo.png $(TARGET_DIR)/1400_11_10.png
-
- cp $(CHART_DIR)/mds_demo_1.png $(TARGET_DIR)/1400_11_13.png
- cp $(CHART_DIR)/mds_demo_iris.png $(TARGET_DIR)/1400_11_11.png
- cp $(CHART_DIR)/pca_demo_iris.png $(TARGET_DIR)/1400_11_12.png
-
- cp *.py $(TARGET_DIR)/code
- cp Makefile $(TARGET_DIR)/code
-
diff --git a/ch11/demo_corr.py b/ch11/demo_corr.py
index b6951802..07af7dba 100644
--- a/ch11/demo_corr.py
+++ b/ch11/demo_corr.py
@@ -12,8 +12,7 @@
import scipy
from scipy.stats import norm, pearsonr
-DATA_DIR = os.path.join("..", "data")
-CHART_DIR = os.path.join("..", "charts")
+from utils import CHART_DIR
def _plot_correlation_func(x, y):
@@ -39,19 +38,19 @@ def plot_correlation_demo():
x = np.arange(0, 10, 0.2)
pylab.subplot(221)
- y = 0.5 * x + norm.rvs(1, loc=0, scale=.01, size=len(x))
+ y = 0.5 * x + norm.rvs(1, scale=.01, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(222)
- y = 0.5 * x + norm.rvs(1, loc=0, scale=.1, size=len(x))
+ y = 0.5 * x + norm.rvs(1, scale=.1, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(223)
- y = 0.5 * x + norm.rvs(1, loc=0, scale=1, size=len(x))
+ y = 0.5 * x + norm.rvs(1, scale=1, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(224)
- y = norm.rvs(1, loc=0, scale=10, size=len(x))
+ y = norm.rvs(1, scale=10, size=len(x))
_plot_correlation_func(x, y)
pylab.autoscale(tight=True)
@@ -66,19 +65,19 @@ def plot_correlation_demo():
x = np.arange(-5, 5, 0.2)
pylab.subplot(221)
- y = 0.5 * x ** 2 + norm.rvs(1, loc=0, scale=.01, size=len(x))
+ y = 0.5 * x ** 2 + norm.rvs(1, scale=.01, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(222)
- y = 0.5 * x ** 2 + norm.rvs(1, loc=0, scale=.1, size=len(x))
+ y = 0.5 * x ** 2 + norm.rvs(1, scale=.1, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(223)
- y = 0.5 * x ** 2 + norm.rvs(1, loc=0, scale=1, size=len(x))
+ y = 0.5 * x ** 2 + norm.rvs(1, scale=1, size=len(x))
_plot_correlation_func(x, y)
pylab.subplot(224)
- y = 0.5 * x ** 2 + norm.rvs(1, loc=0, scale=10, size=len(x))
+ y = 0.5 * x ** 2 + norm.rvs(1, scale=10, size=len(x))
_plot_correlation_func(x, y)
pylab.autoscale(tight=True)
diff --git a/ch11/demo_mds.py b/ch11/demo_mds.py
index 581d4359..8f956cd3 100644
--- a/ch11/demo_mds.py
+++ b/ch11/demo_mds.py
@@ -14,8 +14,7 @@
from sklearn import linear_model, manifold, decomposition, datasets
logistic = linear_model.LogisticRegression()
-
-CHART_DIR = os.path.join("..", "charts")
+from utils import CHART_DIR
np.random.seed(3)
diff --git a/ch11/demo_mi.py b/ch11/demo_mi.py
index e38b5fa2..9fb9f8fc 100644
--- a/ch11/demo_mi.py
+++ b/ch11/demo_mi.py
@@ -11,8 +11,7 @@
import numpy as np
from scipy.stats import norm, entropy
-DATA_DIR = os.path.join("..", "data")
-CHART_DIR = os.path.join("..", "charts")
+from utils import CHART_DIR
def mutual_info(x, y, bins=10):
@@ -73,19 +72,19 @@ def plot_mi_demo():
x = np.arange(0, 10, 0.2)
pylab.subplot(221)
- y = 0.5 * x + norm.rvs(1, loc=0, scale=.01, size=len(x))
+ y = 0.5 * x + norm.rvs(1, scale=.01, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(222)
- y = 0.5 * x + norm.rvs(1, loc=0, scale=.1, size=len(x))
+ y = 0.5 * x + norm.rvs(1, scale=.1, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(223)
- y = 0.5 * x + norm.rvs(1, loc=0, scale=1, size=len(x))
+ y = 0.5 * x + norm.rvs(1, scale=1, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(224)
- y = norm.rvs(1, loc=0, scale=10, size=len(x))
+ y = norm.rvs(1, scale=10, size=len(x))
_plot_mi_func(x, y)
pylab.autoscale(tight=True)
@@ -100,19 +99,19 @@ def plot_mi_demo():
x = np.arange(-5, 5, 0.2)
pylab.subplot(221)
- y = 0.5 * x ** 2 + norm.rvs(1, loc=0, scale=.01, size=len(x))
+ y = 0.5 * x ** 2 + norm.rvs(1, scale=.01, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(222)
- y = 0.5 * x ** 2 + norm.rvs(1, loc=0, scale=.1, size=len(x))
+ y = 0.5 * x ** 2 + norm.rvs(1, scale=.1, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(223)
- y = 0.5 * x ** 2 + norm.rvs(1, loc=0, scale=1, size=len(x))
+ y = 0.5 * x ** 2 + norm.rvs(1, scale=1, size=len(x))
_plot_mi_func(x, y)
pylab.subplot(224)
- y = 0.5 * x ** 2 + norm.rvs(1, loc=0, scale=10, size=len(x))
+ y = 0.5 * x ** 2 + norm.rvs(1, scale=10, size=len(x))
_plot_mi_func(x, y)
pylab.autoscale(tight=True)
diff --git a/ch11/demo_pca.py b/ch11/demo_pca.py
index 650a2c10..4cfa0945 100644
--- a/ch11/demo_pca.py
+++ b/ch11/demo_pca.py
@@ -16,12 +16,12 @@
logistic = linear_model.LogisticRegression()
-CHART_DIR = os.path.join("..", "charts")
+from utils import CHART_DIR
np.random.seed(3)
x1 = np.arange(0, 10, .2)
-x2 = x1 + np.random.normal(loc=0, scale=1, size=len(x1))
+x2 = x1 + np.random.normal(scale=1, size=len(x1))
def plot_simple_demo_1():
@@ -35,7 +35,7 @@ def plot_simple_demo_1():
pylab.ylabel("$X_2$")
x1 = np.arange(0, 10, .2)
- x2 = x1 + np.random.normal(loc=0, scale=1, size=len(x1))
+ x2 = x1 + np.random.normal(scale=1, size=len(x1))
good = (x1 > 5) | (x2 > 5)
bad = ~good
@@ -89,7 +89,7 @@ def plot_simple_demo_2():
pylab.ylabel("$X_2$")
x1 = np.arange(0, 10, .2)
- x2 = x1 + np.random.normal(loc=0, scale=1, size=len(x1))
+ x2 = x1 + np.random.normal(scale=1, size=len(x1))
good = x1 > x2
bad = ~good
diff --git a/ch11/utils.py b/ch11/utils.py
new file mode 100644
index 00000000..7b2ec21b
--- /dev/null
+++ b/ch11/utils.py
@@ -0,0 +1,19 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+import os
+
+DATA_DIR = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "data")
+
+CHART_DIR = os.path.join(
+ os.path.dirname(os.path.realpath(__file__)), "charts")
+
+for d in [DATA_DIR, CHART_DIR]:
+ if not os.path.exists(d):
+ os.mkdir(d)
+
diff --git a/ch12/.gitignore b/ch12/.gitignore
new file mode 100644
index 00000000..fa8fbcc2
--- /dev/null
+++ b/ch12/.gitignore
@@ -0,0 +1,3 @@
+*.jugdata/
+output.txt
+results.image.txt
diff --git a/ch12/README.rst b/ch12/README.rst
new file mode 100644
index 00000000..fba88d4a
--- /dev/null
+++ b/ch12/README.rst
@@ -0,0 +1,28 @@
+==========
+Chapter 12
+==========
+
+Support code for *Chapter 12: Big(ger) Data*
+
+Data
+----
+
+This chapter relies only on the image dataset that is packaged with the
+repository at ``../SimpleImageDataset/``.
+
+Scripts
+-------
+
+chapter.py
+ Code as written in the book
+jugfile.py
+ Example jugfile
+image-classification.py
+ Jugfile implementation of image classification from Chapter 10
+
+setup-aws.txt
+ Commands to setup Amazon WebServices machine
+run-jugfile.sh
+ Wrapper script to run jug file on jugfile.py
+run-image-classification.sh
+ Wrapper script to run jug file on image-classification.py
diff --git a/ch12/chapter.py b/ch12/chapter.py
new file mode 100644
index 00000000..15326680
--- /dev/null
+++ b/ch12/chapter.py
@@ -0,0 +1,95 @@
+from jug import TaskGenerator
+from glob import glob
+import mahotas as mh
+@TaskGenerator
+def compute_texture(im):
+ from features import texture
+ imc = mh.imread(im)
+ return texture(mh.colors.rgb2gray(imc))
+
+@TaskGenerator
+def chist_file(fname):
+ from features import chist
+ im = mh.imread(fname)
+ return chist(im)
+
+import numpy as np
+to_array = TaskGenerator(np.array)
+hstack = TaskGenerator(np.hstack)
+
+haralicks = []
+chists = []
+labels = []
+
+# Change this variable to point to
+# the location of the dataset is on disk
+basedir = '../SimpleImageDataset/'
+# Use glob to get all the images
+images = glob('{}/*.jpg'.format(basedir))
+
+for fname in sorted(images):
+ haralicks.append(compute_texture(fname))
+ chists.append(chist_file(fname))
+ # The class is encoded in the filename as xxxx00.jpg
+ labels.append(fname[:-len('00.jpg')])
+
+haralicks = to_array(haralicks)
+chists = to_array(chists)
+labels = to_array(labels)
+
+@TaskGenerator
+def accuracy(features, labels):
+ from sklearn.linear_model import LogisticRegression
+ from sklearn.pipeline import Pipeline
+ from sklearn.preprocessing import StandardScaler
+ from sklearn import cross_validation
+
+ clf = Pipeline([('preproc', StandardScaler()),
+ ('classifier', LogisticRegression())])
+ cv = cross_validation.LeaveOneOut(len(features))
+ scores = cross_validation.cross_val_score(
+ clf, features, labels, cv=cv)
+ return scores.mean()
+scores_base = accuracy(haralicks, labels)
+scores_chist = accuracy(chists, labels)
+
+combined = hstack([chists, haralicks])
+scores_combined = accuracy(combined, labels)
+
+@TaskGenerator
+def print_results(scores):
+ with open('results.image.txt', 'w') as output:
+ for k,v in scores:
+ output.write('Accuracy [{}]: {:.1%}\n'.format(
+ k, v.mean()))
+
+print_results([
+ ('base', scores_base),
+ ('chists', scores_chist),
+ ('combined' , scores_combined),
+ ])
+
+@TaskGenerator
+def compute_lbp(fname):
+ from mahotas.features import lbp
+ imc = mh.imread(fname)
+ im = mh.colors.rgb2grey(imc)
+ return lbp(im, radius=8, points=6)
+
+lbps = []
+for fname in sorted(images):
+ # the rest of the loop as before
+ lbps.append(compute_lbp(fname))
+lbps = to_array(lbps)
+
+scores_lbps = accuracy(lbps, labels)
+combined_all = hstack([chists, haralicks, lbps])
+scores_combined_all = accuracy(combined_all, labels)
+
+print_results([
+ ('base', scores_base),
+ ('chists', scores_chist),
+ ('lbps', scores_lbps),
+ ('combined' , scores_combined),
+ ('combined_all' , scores_combined_all),
+ ])
diff --git a/ch12/features.py b/ch12/features.py
new file mode 120000
index 00000000..142a324d
--- /dev/null
+++ b/ch12/features.py
@@ -0,0 +1 @@
+../ch10/features.py
\ No newline at end of file
diff --git a/ch12/image-classification.py b/ch12/image-classification.py
new file mode 100644
index 00000000..6f76d26d
--- /dev/null
+++ b/ch12/image-classification.py
@@ -0,0 +1,116 @@
+# This code is supporting material for the book
+# Building Machine Learning Systems with Python
+# by Willi Richert and Luis Pedro Coelho
+# published by PACKT Publishing
+#
+# It is made available under the MIT License
+
+import mahotas as mh
+import numpy as np
+from glob import glob
+from jug import TaskGenerator
+
+# We need to use the `features` module from chapter 10.
+from sys import path
+path.append('../ch10')
+
+
+# This is the jug-enabled version of the script ``figure18.py`` in Chapter 10
+
+basedir = '../SimpleImageDataset/'
+
+@TaskGenerator
+def compute_texture(im):
+ '''Compute features for an image
+
+ Parameters
+ ----------
+ im : str
+ filepath for image to process
+
+ Returns
+ -------
+ fs : ndarray
+ 1-D array of features
+ '''
+ from features import texture
+ imc = mh.imread(im)
+ return texture(mh.colors.rgb2grey(imc))
+
+@TaskGenerator
+def chist(fname):
+ from features import chist as color_histogram
+ im = mh.imread(fname)
+ return color_histogram(im)
+
+@TaskGenerator
+def compute_lbp(fname):
+ from mahotas.features import lbp
+ imc = mh.imread(fname)
+ im = mh.colors.rgb2grey(imc)
+ return lbp(im, radius=8, points=6)
+
+
+@TaskGenerator
+def accuracy(features, labels):
+ from sklearn.linear_model import LogisticRegression
+ from sklearn.pipeline import Pipeline
+ from sklearn.preprocessing import StandardScaler
+ from sklearn import cross_validation
+ # We use logistic regression because it is very fast.
+ # Feel free to experiment with other classifiers
+ clf = Pipeline([('preproc', StandardScaler()),
+ ('classifier', LogisticRegression())])
+ cv = cross_validation.LeaveOneOut(len(features))
+ scores = cross_validation.cross_val_score(
+ clf, features, labels, cv=cv)
+ return scores.mean()
+
+
+@TaskGenerator
+def print_results(scores):
+ with open('results.image.txt', 'w') as output:
+ for k,v in scores:
+ output.write('Accuracy (LOO x-val) with Logistic Regression [{0}]: {1:.1%}\n'.format(
+ k, v.mean()))
+
+
+to_array = TaskGenerator(np.array)
+hstack = TaskGenerator(np.hstack)
+
+haralicks = []
+chists = []
+lbps = []
+labels = []
+
+# Use glob to get all the images
+images = glob('{0}/*.jpg'.format(basedir))
+for fname in sorted(images):
+ haralicks.append(compute_texture(fname))
+ chists.append(chist(fname))
+ lbps.append(compute_lbp(fname))
+ labels.append(fname[:-len('00.jpg')]) # The class is encoded in the filename as xxxx00.jpg
+
+haralicks = to_array(haralicks)
+chists = to_array(chists)
+lbps = to_array(lbps)
+labels = to_array(labels)
+
+scores_base = accuracy(haralicks, labels)
+scores_chist = accuracy(chists, labels)
+scores_lbps = accuracy(lbps, labels)
+
+combined = hstack([chists, haralicks])
+scores_combined = accuracy(combined, labels)
+
+combined_all = hstack([chists, haralicks, lbps])
+scores_combined_all = accuracy(combined_all, labels)
+
+print_results([
+ ('base', scores_base),
+ ('chists', scores_chist),
+ ('lbps', scores_lbps),
+ ('combined' , scores_combined),
+ ('combined_all' , scores_combined_all),
+ ])
+
diff --git a/ch12/jugfile.py b/ch12/jugfile.py
index 91306ccb..9d7e2b7a 100644
--- a/ch12/jugfile.py
+++ b/ch12/jugfile.py
@@ -23,7 +23,7 @@ def add(a, b):
@TaskGenerator
def print_final_result(oname, value):
with open(oname, 'w') as output:
- print >>output, "Final result:", value
+ output.write("Final result: {0}\n".format(value))
input = 2
y = double(input)
diff --git a/ch12/run-image-classification.sh b/ch12/run-image-classification.sh
new file mode 100755
index 00000000..868d07fa
--- /dev/null
+++ b/ch12/run-image-classification.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+jug execute image-classification.py
diff --git a/ch12/run-jugfile.sh b/ch12/run-jugfile.sh
new file mode 100755
index 00000000..0ff59131
--- /dev/null
+++ b/ch12/run-jugfile.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+
+jug execute
+
diff --git a/ch12/setup-aws.txt b/ch12/setup-aws.txt
new file mode 100644
index 00000000..292654eb
--- /dev/null
+++ b/ch12/setup-aws.txt
@@ -0,0 +1,7 @@
+sudo yum update
+sudo yum -y install python-devel python-pip numpy scipy python-matplotlib
+sudo yum -y install gcc-c++
+sudo yum -y install git
+sudo pip-python install -U pip
+sudo pip install scikit-learn jug mahotas
+