|
8 | 8 | # This script demonstrates the difference between the training accuracy and |
9 | 9 | # testing (held-out) accuracy. |
10 | 10 |
|
11 | | -from matplotlib import pyplot as plt |
12 | 11 | import numpy as np |
13 | 12 | from sklearn.datasets import load_iris |
14 | 13 | from threshold import fit_model, accuracy |
|
18 | 17 | labels = data['target_names'][data['target']] |
19 | 18 |
|
20 | 19 | # We are going to remove the setosa examples as they are too easy: |
21 | | -setosa = (labels == 'setosa') |
22 | | -features = features[~setosa] |
23 | | -labels = labels[~setosa] |
| 20 | +is_setosa = (labels == 'setosa') |
| 21 | +features = features[~is_setosa] |
| 22 | +labels = labels[~is_setosa] |
24 | 23 |
|
25 | 24 | # Now we classify virginica vs non-virginica |
26 | | -virginica = (labels == 'virginica') |
| 25 | +is_virginica = (labels == 'virginica') |
27 | 26 |
|
28 | 27 | # Split the data in two: testing and training |
29 | 28 | testing = np.tile([True, False], 50) # testing = [True,False,True,False,True,False...] |
30 | 29 | training = ~testing |
31 | 30 |
|
32 | | -model = fit_model(features[training], virginica[training]) |
33 | | -train_accuracy = accuracy(features[training], virginica[training], model) |
34 | | -test_accuracy = accuracy(features[testing], virginica[testing], model) |
| 31 | +model = fit_model(features[training], is_virginica[training]) |
| 32 | +train_accuracy = accuracy(features[training], is_virginica[training], model) |
| 33 | +test_accuracy = accuracy(features[testing], is_virginica[testing], model) |
35 | 34 |
|
36 | 35 | print('''\ |
37 | 36 | Training accuracy was {0:.1%}. |
|
0 commit comments