|
4 | 4 | removeall, unique, product, mode, argmax, argmax_random_tie, isclose, gaussian, |
5 | 5 | dotproduct, vector_add, scalar_vector_product, weighted_sample_with_replacement, |
6 | 6 | weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table, |
7 | | - open_data, sigmoid_derivative |
| 7 | + open_data, sigmoid_derivative, probability |
8 | 8 | ) |
9 | 9 |
|
10 | 10 | import copy |
@@ -493,6 +493,33 @@ def information_content(values): |
493 | 493 |
|
494 | 494 | # ______________________________________________________________________________ |
495 | 495 |
|
| 496 | + |
| 497 | +def RandomForest(dataset, n=5): |
| 498 | + """A ensemble of Decision trese trained using bagging and feature bagging.""" |
| 499 | + |
| 500 | + predictors = [DecisionTreeLearner(examples=data_bagging(dataset), |
| 501 | + attrs=dataset.attrs, |
| 502 | + attrnames=dataset.attrnames, |
| 503 | + target=dataset.target, |
| 504 | + inputs=feature_bagging(datatset)) for _ in range(n)] |
| 505 | + |
| 506 | + def data_bagging(dataset, m=0): |
| 507 | + """Sample m examples with replacement""" |
| 508 | + n = len(dataset.examples) |
| 509 | + return weighted_sample_with_replacement(m or n, examples, [1]*n) |
| 510 | + |
| 511 | + def feature_bagging(dataset, p=0.7): |
| 512 | + """Feature bagging with probability p to retain an attribute""" |
| 513 | + inputs = [i for i in dataset.inputs if probability(p)] |
| 514 | + return inputs or dataset.inputs |
| 515 | + |
| 516 | + def predict(example): |
| 517 | + return mode(predictor(example) for predictor in predictors) |
| 518 | + |
| 519 | + return predict |
| 520 | + |
| 521 | +# ______________________________________________________________________________ |
| 522 | + |
496 | 523 | # A decision list is implemented as a list of (test, value) pairs. |
497 | 524 |
|
498 | 525 |
|
|
0 commit comments