@@ -116,7 +116,7 @@ def setproblem(self, target, inputs=None, exclude=()):
116116 self .check_me ()
117117
118118 def check_me (self ):
119- "Check that my fields make sense."
119+ """ Check that my fields make sense."" "
120120 assert len (self .attrnames ) == len (self .attrs )
121121 assert self .target in self .attrs
122122 assert self .target not in self .inputs
@@ -126,20 +126,20 @@ def check_me(self):
126126 list (map (self .check_example , self .examples ))
127127
128128 def add_example (self , example ):
129- "Add an example to the list of examples, checking it first."
129+ """ Add an example to the list of examples, checking it first."" "
130130 self .check_example (example )
131131 self .examples .append (example )
132132
133133 def check_example (self , example ):
134- "Raise ValueError if example has any invalid values."
134+ """ Raise ValueError if example has any invalid values."" "
135135 if self .values :
136136 for a in self .attrs :
137137 if example [a ] not in self .values [a ]:
138138 raise ValueError ('Bad value {} for attribute {} in {}'
139139 .format (example [a ], self .attrnames [a ], example ))
140140
141141 def attrnum (self , attr ):
142- "Returns the number used for attr, which can be a name, or -n .. n-1."
142+ """ Returns the number used for attr, which can be a name, or -n .. n-1."" "
143143 if isinstance (attr , str ):
144144 return self .attrnames .index (attr )
145145 elif attr < 0 :
@@ -148,7 +148,7 @@ def attrnum(self, attr):
148148 return attr
149149
150150 def sanitize (self , example ):
151- "Return a copy of example, with non-input attributes replaced by None."
151+ """ Return a copy of example, with non-input attributes replaced by None."" "
152152 return [attr_i if i in self .inputs else None
153153 for i , attr_i in enumerate (example )]
154154
@@ -161,12 +161,11 @@ def __repr__(self):
161161
162162def parse_csv (input , delim = ',' ):
163163 r"""Input is a string consisting of lines, each line has comma-delimited
164- fields. Convert this into a list of lists. Blank lines are skipped.
164+ fields. Convert this into a list of lists. Blank lines are skipped.
165165 Fields that look like numbers are converted to numbers.
166166 The delim defaults to ',' but '\t' and None are also reasonable values.
167167 >>> parse_csv('1, 2, 3 \n 0, 2, na')
168- [[1, 2, 3], [0, 2, 'na']]
169- """
168+ [[1, 2, 3], [0, 2, 'na']]"""
170169 lines = [line for line in input .splitlines () if line .strip ()]
171170 return [list (map (num_or_str , line .split (delim ))) for line in lines ]
172171
@@ -195,7 +194,7 @@ def __init__(self, observations=[], default=0):
195194 self .add (o )
196195
197196 def add (self , o ):
198- "Add an observation o to the distribution."
197+ """ Add an observation o to the distribution."" "
199198 self .smooth_for (o )
200199 self .dictionary [o ] += 1
201200 self .n_obs += 1
@@ -210,18 +209,18 @@ def smooth_for(self, o):
210209 self .sampler = None
211210
212211 def __getitem__ (self , item ):
213- "Return an estimate of the probability of item."
212+ """ Return an estimate of the probability of item."" "
214213 self .smooth_for (item )
215214 return self .dictionary [item ] / self .n_obs
216215
217216 # (top() and sample() are not used in this module, but elsewhere.)
218217
219218 def top (self , n ):
220- "Return (count, obs) tuples for the n most frequent observations."
219+ """ Return (count, obs) tuples for the n most frequent observations."" "
221220 return heapq .nlargest (n , [(v , k ) for (k , v ) in self .dictionary .items ()])
222221
223222 def sample (self ):
224- "Return a random sample from the distribution."
223+ """ Return a random sample from the distribution."" "
225224 if self .sampler is None :
226225 self .sampler = weighted_sampler (list (self .dictionary .keys ()),
227226 list (self .dictionary .values ()))
@@ -236,7 +235,7 @@ def PluralityLearner(dataset):
236235 most_popular = mode ([e [dataset .target ] for e in dataset .examples ])
237236
238237 def predict (example ):
239- "Always return same result: the most popular from the training set."
238+ """ Always return same result: the most popular from the training set."" "
240239 return most_popular
241240 return predict
242241
@@ -274,9 +273,9 @@ def class_probability(targetval):
274273
275274
276275def NearestNeighborLearner (dataset , k = 1 ):
277- "k-NearestNeighbor: the k nearest neighbors vote."
276+ """ k-NearestNeighbor: the k nearest neighbors vote."" "
278277 def predict (example ):
279- "Find the k closest, and have them vote for the best."
278+ """ Find the k closest items , and have them vote for the best."" "
280279 best = heapq .nsmallest (k , ((dataset .distance (e , example ), e )
281280 for e in dataset .examples ))
282281 return mode (e [dataset .target ] for (d , e ) in best )
@@ -291,18 +290,18 @@ class DecisionFork:
291290 of branches, one for each of the attribute's values."""
292291
293292 def __init__ (self , attr , attrname = None , branches = None ):
294- "Initialize by saying what attribute this node tests."
293+ """ Initialize by saying what attribute this node tests."" "
295294 self .attr = attr
296295 self .attrname = attrname or attr
297296 self .branches = branches or {}
298297
299298 def __call__ (self , example ):
300- "Given an example, classify it using the attribute and the branches."
299+ """ Given an example, classify it using the attribute and the branches."" "
301300 attrvalue = example [self .attr ]
302301 return self .branches [attrvalue ](example )
303302
304303 def add (self , val , subtree ):
305- "Add a branch. If self.attr = val, go to the given subtree."
304+ """ Add a branch. If self.attr = val, go to the given subtree."" "
306305 self .branches [val ] = subtree
307306
308307 def display (self , indent = 0 ):
@@ -319,7 +318,7 @@ def __repr__(self):
319318
320319class DecisionLeaf :
321320
322- "A leaf of a decision tree holds just a result."
321+ """ A leaf of a decision tree holds just a result."" "
323322
324323 def __init__ (self , result ):
325324 self .result = result
@@ -337,7 +336,7 @@ def __repr__(self):
337336
338337
339338def DecisionTreeLearner (dataset ):
340- "[Figure 18.5]"
339+ """ [Figure 18.5]"" "
341340
342341 target , values = dataset .target , dataset .values
343342
@@ -365,21 +364,21 @@ def plurality_value(examples):
365364 return DecisionLeaf (popular )
366365
367366 def count (attr , val , examples ):
368- "Count the number of examples that have attr = val."
367+ """ Count the number of examples that have attr = val."" "
369368 return len (e [attr ] == val for e in examples ) #count(e[attr] == val for e in examples)
370369
371370 def all_same_class (examples ):
372- "Are all these examples in the same target class?"
371+ """ Are all these examples in the same target class?"" "
373372 class0 = examples [0 ][target ]
374373 return all (e [target ] == class0 for e in examples )
375374
376375 def choose_attribute (attrs , examples ):
377- "Choose the attribute with the highest information gain."
376+ """ Choose the attribute with the highest information gain."" "
378377 return argmax_random_tie (attrs ,
379378 key = lambda a : information_gain (a , examples ))
380379
381380 def information_gain (attr , examples ):
382- "Return the expected reduction in entropy from splitting by attr."
381+ """ Return the expected reduction in entropy from splitting by attr."" "
383382 def I (examples ):
384383 return information_content ([count (target , v , examples )
385384 for v in values [target ]])
@@ -389,15 +388,15 @@ def I(examples):
389388 return I (examples ) - remainder
390389
391390 def split_by (attr , examples ):
392- "Return a list of (val, examples) pairs for each val of attr."
391+ """ Return a list of (val, examples) pairs for each val of attr."" "
393392 return [(v , [e for e in examples if e [attr ] == v ])
394393 for v in values [attr ]]
395394
396395 return decision_tree_learning (dataset .examples , dataset .inputs )
397396
398397
399398def information_content (values ):
400- "Number of bits to represent the probability distribution in values."
399+ """ Number of bits to represent the probability distribution in values."" "
401400 probabilities = normalize (removeall (0 , values ))
402401 return sum (- p * math .log2 (p ) for p in probabilities )
403402
@@ -423,11 +422,11 @@ def find_examples(examples):
423422 raise NotImplementedError
424423
425424 def passes (example , test ):
426- "Does the example pass the test?"
425+ """ Does the example pass the test?"" "
427426 raise NotImplementedError
428427
429428 def predict (example ):
430- "Predict the outcome for the first passing test."
429+ """ Predict the outcome for the first passing test."" "
431430 for test , outcome in predict .decision_list :
432431 if passes (example , test ):
433432 return outcome
@@ -443,7 +442,7 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=[3],
443442 """
444443 Layered feed-forward network.
445444 hidden_layer_sizes: List of number of hidden units per hidden layer
446- learning_rate: Learning rate of gradient decent
445+ learning_rate: Learning rate of gradient descent
447446 epoches: Number of passes over the dataset
448447 """
449448
@@ -483,7 +482,7 @@ class NNUnit:
483482 """
484483 Single Unit of Multiple Layer Neural Network
485484 inputs: Incoming connections
486- weights: weights to incoming connections
485+ weights: Weights to incoming connections
487486 """
488487
489488 def __init__ (self , weights = None , inputs = None ):
@@ -496,7 +495,7 @@ def __init__(self, weights=None, inputs=None):
496495def network (input_units , hidden_layer_sizes , output_units ):
497496 """
498497 Create Directed Acyclic Network of given number layers.
499- hidden_layers_sizes : list number of neuron units in each hidden layer
498+ hidden_layers_sizes : List number of neuron units in each hidden layer
500499 excluding input and output layers
501500 """
502501 # Check for PerceptronLearner
@@ -623,8 +622,8 @@ def predict(example):
623622# ______________________________________________________________________________
624623
625624
626- def Linearlearner (dataset , learning_rate = 0.01 , epochs = 100 ):
627- """Define with learner = Linearlearner (data); infer with learner(x)."""
625+ def LinearLearner (dataset , learning_rate = 0.01 , epochs = 100 ):
626+ """Define with learner = LinearLearner (data); infer with learner(x)."""
628627 idx_i = dataset .inputs
629628 idx_t = dataset .target # As of now, dataset.target gives only one index.
630629 examples = dataset .examples
@@ -698,7 +697,7 @@ def train(dataset):
698697
699698
700699def WeightedMajority (predictors , weights ):
701- "Return a predictor that takes a weighted vote."
700+ """ Return a predictor that takes a weighted vote."" "
702701 def predict (example ):
703702 return weighted_mode ((predictor (example ) for predictor in predictors ),
704703 weights )
@@ -708,7 +707,8 @@ def predict(example):
708707def weighted_mode (values , weights ):
709708 """Return the value with the greatest total weight.
710709 >>> weighted_mode('abbaa', [1,2,3,1,2])
711- 'b'"""
710+ 'b'
711+ """
712712 totals = defaultdict (int )
713713 for v , w in zip (values , weights ):
714714 totals [v ] += w
@@ -727,7 +727,7 @@ def train(dataset, weights):
727727
728728
729729def replicated_dataset (dataset , weights , n = None ):
730- "Copy dataset, replicating each example in proportion to its weight."
730+ """ Copy dataset, replicating each example in proportion to its weight."" "
731731 n = n or len (dataset .examples )
732732 result = copy .copy (dataset )
733733 result .examples = weighted_replicate (dataset .examples , weights , n )
@@ -739,7 +739,8 @@ def weighted_replicate(seq, weights, n):
739739 seq proportional to the corresponding weight (filling in fractions
740740 randomly).
741741 >>> weighted_replicate('ABC', [1,2,1], 4)
742- ['A', 'B', 'B', 'C']"""
742+ ['A', 'B', 'B', 'C']
743+ """
743744 assert len (seq ) == len (weights )
744745 weights = normalize (weights )
745746 wholes = [int (w * n ) for w in weights ]
@@ -755,7 +756,7 @@ def flatten(seqs): return sum(seqs, [])
755756
756757
757758def test (predict , dataset , examples = None , verbose = 0 ):
758- "Return the proportion of the examples that are NOT correctly predicted."
759+ """ Return the proportion of the examples that are NOT correctly predicted."" "
759760 if examples is None :
760761 examples = dataset .examples
761762 if len (examples ) == 0 :
@@ -787,7 +788,7 @@ def train_and_test(dataset, start, end):
787788def cross_validation (learner , size , dataset , k = 10 , trials = 1 ):
788789 """Do k-fold cross_validate and return their mean.
789790 That is, keep out 1/k of the examples for testing on each of k runs.
790- Shuffle the examples first; If trials>1, average over several shuffles.
791+ Shuffle the examples first; if trials>1, average over several shuffles.
791792 Returns Training error, Validataion error"""
792793 if k is None :
793794 k = len (dataset .examples )
@@ -820,11 +821,11 @@ def cross_validation(learner, size, dataset, k=10, trials=1):
820821
821822def cross_validation_wrapper (learner , dataset , k = 10 , trials = 1 ):
822823 """
823- Fig 18.8
824+ [ Fig 18.8]
824825 Return the optimal value of size having minimum error
825826 on validataion set.
826- err_train: a training error array, indexed by size
827- err_val: a validataion error array, indexed by size
827+ err_train: A training error array, indexed by size
828+ err_val: A validataion error array, indexed by size
828829 """
829830 err_val = []
830831 err_train = []
@@ -843,7 +844,7 @@ def cross_validation_wrapper(learner, dataset, k=10, trials=1):
843844
844845
845846def leave_one_out (learner , dataset ):
846- "Leave one out cross-validation over the dataset."
847+ """ Leave one out cross-validation over the dataset."" "
847848 return cross_validation (learner , size , dataset , k = len (dataset .examples ))
848849
849850
@@ -878,7 +879,7 @@ def score(learner, size):
878879
879880
880881def RestaurantDataSet (examples = None ):
881- "Build a DataSet of Restaurant waiting examples. [Figure 18.3]"
882+ """ Build a DataSet of Restaurant waiting examples. [Figure 18.3]"" "
882883 return DataSet (name = 'restaurant' , target = 'Wait' , examples = examples ,
883884 attrnames = 'Alternate Bar Fri/Sat Hungry Patrons Price ' +
884885 'Raining Reservation Type WaitEstimate Wait' )
@@ -917,7 +918,7 @@ def T(attrname, branches):
917918
918919
919920def SyntheticRestaurant (n = 20 ):
920- "Generate a DataSet with n examples."
921+ """ Generate a DataSet with n examples."" "
921922 def gen ():
922923 example = list (map (random .choice , restaurant .values ))
923924 example [restaurant .target ] = waiting_decision_tree (example )
0 commit comments