| 
 | 1 | +#-*-coding:utf-8-*-  | 
 | 2 | +'''  | 
 | 3 | +Created on 2016年5月10日  | 
 | 4 | +
  | 
 | 5 | +@author: Gamer Think  | 
 | 6 | +'''  | 
 | 7 | +from test.inspect_fodder import StupidGit  | 
 | 8 | + | 
 | 9 | +__author__="thinkgamer"  | 
 | 10 | + | 
 | 11 | +from numpy import *  | 
 | 12 | + | 
 | 13 | +#加载数据集  | 
 | 14 | +def loadSimData():  | 
 | 15 | +    datMat = matrix([[1.0 , 2.1],  | 
 | 16 | +                     [2.  , 1.1],  | 
 | 17 | +                     [1.3 , 1. ],  | 
 | 18 | +                     [1.  , 1. ],  | 
 | 19 | +                     [2.  , 1. ]])  | 
 | 20 | +      | 
 | 21 | +    classLabels = [1.0, 1.0, -1.0, -1.0, 1.0]  | 
 | 22 | +    return datMat,classLabels  | 
 | 23 | + | 
 | 24 | +#单层决策树生成函数  | 
 | 25 | +def stumpClassify(dataMatrix, dimen,threshVal, threshInsq):  | 
 | 26 | +    retArray = ones((shape(dataMatrix)[0],1))  | 
 | 27 | +    if threshInsq == 'lt':  | 
 | 28 | +        retArray[dataMatrix[:,dimen] <= threshVal] = -1.0  | 
 | 29 | +    else:  | 
 | 30 | +        retArray[dataMatrix[:,dimen] > threshVal] = -1.0  | 
 | 31 | +    return retArray  | 
 | 32 | + | 
 | 33 | +def buildStump(dataArr,classLabels,D):  | 
 | 34 | +    dataMatrix = mat(dataArr)  | 
 | 35 | +    #matrix必须是二维的,numpy可以是多维的  | 
 | 36 | +    labelMat = mat(classLabels).T #.T表示转置矩阵  | 
 | 37 | +    m,n = shape(dataMatrix)     #给定数据集的行列数  | 
 | 38 | +    numSteps = 10.0 #变用于在特征的所有可能值上进行遍历  | 
 | 39 | +    bestStump = {} #字典用于存储给定权重向量0时所得到的最佳单层决策树的相关信息  | 
 | 40 | +    bestClassEnt = mat(zeros((m,1)))  | 
 | 41 | +    minError = inf #首先将minError初始化为正无穷大  | 
 | 42 | +    for i in range(n):  | 
 | 43 | +        rangeMin = dataMatrix[:,i].min()  | 
 | 44 | +        rangeMax = dataMatrix[:,i].max()  | 
 | 45 | +        stepSize = (rangeMax-rangeMin)/numSteps  | 
 | 46 | +        for j in range(-1,int(numSteps)+1):  | 
 | 47 | +            #lt :小于,lte,le:小于等于  | 
 | 48 | +            #gt:大于,,gte,ge:大于等于  | 
 | 49 | +            #eq:等于  ne,neq:不等于  | 
 | 50 | +            for inequal in ['lt','gt']:  | 
 | 51 | +                threshVal = (rangeMin + float(j) * stepSize)  | 
 | 52 | +                predictedVals = stumpClassify(dataMatrix,i,threshVal, inequal)  | 
 | 53 | +                errArr = mat(ones((m,1)))  | 
 | 54 | +                errArr[predictedVals==labelMat]=0  | 
 | 55 | +                weightedError = D.T * errArr    #计算加权错误概率  | 
 | 56 | +#                 print "split: dim %d, thresh % .2f, thresh inequal: %s, the weighted error is %.3f" % (i, threshVal,inequal,weightedError)  | 
 | 57 | +                #更新bestStump中保存的最佳单层决策树的相关信息  | 
 | 58 | +                if weightedError < minError:  | 
 | 59 | +                    minError = weightedError  | 
 | 60 | +                    bestClassEnt = predictedVals.copy()  | 
 | 61 | +                    bestStump['dim'] = i  | 
 | 62 | +                    bestStump['thresh'] = threshVal  | 
 | 63 | +                    bestStump['ineq'] = inequal  | 
 | 64 | +       | 
 | 65 | +    return bestStump,minError,bestClassEnt   | 
 | 66 | +                      | 
 | 67 | +#基于单层决策树的AdaBoost训练过程  | 
 | 68 | +#numIt:迭代次数,默认为40  | 
 | 69 | +def adaBoostTrainDS(dataArr,classLabels,numIt=40):  | 
 | 70 | +    weakClassArr = []  | 
 | 71 | +    m= shape(dataArr)[0]  | 
 | 72 | +    D = mat(ones((m,1))/m)  | 
 | 73 | +    aggClassEst = mat(zeros((m,1)))  | 
 | 74 | +    #迭代  | 
 | 75 | +    for i in range(numIt):  | 
 | 76 | +        #调用单层决策树  | 
 | 77 | +        bestStump,error,classEst = buildStump(dataArr, classLabels, D)    | 
 | 78 | +        print "D:",D.T  #打印D的转置矩阵  | 
 | 79 | +        alpha = float(0.5 * log((1.0 - error) / max(error,1e-16)))# max(error,1e-16)))用于确保没有错误时,不会发生溢出  | 
 | 80 | +        bestStump['alpha'] = alpha  | 
 | 81 | +        weakClassArr.append(bestStump)  | 
 | 82 | +        print "classEst:",classEst.T  | 
 | 83 | +        #为下一次迭代计算D  | 
 | 84 | +        expon = multiply(-1 * alpha * mat(classLabels).T,classEst)  | 
 | 85 | +        D = multiply(D,exp(expon))  | 
 | 86 | +        D = D /D.sum()  | 
 | 87 | +        #错误率累加计算  | 
 | 88 | +        aggClassEst += alpha* classEst  | 
 | 89 | +        print "aggClassEst:",aggClassEst.T  | 
 | 90 | +        aggErrors = multiply(sign(aggClassEst) != mat(classLabels).T, ones((m,1)))  | 
 | 91 | +        errorRate = aggErrors.sum()/m  | 
 | 92 | +        print "total error:",errorRate  | 
 | 93 | +        #如果不发生错误,返回  | 
 | 94 | +        if errorRate == 0.0:  | 
 | 95 | +            break  | 
 | 96 | +    return weakClassArr        | 
 | 97 | +        | 
 | 98 | + | 
 | 99 | +#AdaBoost分类函数  | 
 | 100 | +#输入参数为待分类样例datToClass和多个弱分类器classifierArr  | 
 | 101 | +def adaClassify(datToClass,classifierArr):  | 
 | 102 | +    dataMatrix = mat(datToClass)  | 
 | 103 | +    m = shape(dataMatrix)[0]        | 
 | 104 | +    aggClassEst = mat(zeros((m,1)))  | 
 | 105 | +    for i in range(len(classifierArr)):  | 
 | 106 | +        classEst = stumpClassify(dataMatrix,classifierArr[i]['dim'],\  | 
 | 107 | +                                 classifierArr[i]['thresh'],\  | 
 | 108 | +                                 classifierArr[i]['ineq'])  | 
 | 109 | +        aggClassEst+= classifierArr[i]['alpha'] * classEst  | 
 | 110 | +        print aggClassEst  | 
 | 111 | +    return sign(aggClassEst)  | 
 | 112 | + | 
 | 113 | +        | 
 | 114 | +#main函数  | 
 | 115 | +if __name__=="__main__":  | 
 | 116 | +    #加载数据集  | 
 | 117 | +    datMat,classLabels = loadSimData()  | 
 | 118 | +#     print "datMat:",datMat  | 
 | 119 | +#     print "classLabels:",classLabels  | 
 | 120 | +      | 
 | 121 | +    #单层决策树生成函数  | 
 | 122 | +#     D = mat(ones((5,1))/5)  | 
 | 123 | +#     print buildStump(datMat, classLabels, D)  | 
 | 124 | +      | 
 | 125 | +    #基于单层决策树的Adaboost训练过程  | 
 | 126 | +    classifierArray = adaBoostTrainDS(datMat, classLabels, 30)  | 
 | 127 | +#     for classifier in classifierArray:  | 
 | 128 | +#         print classifier   | 
 | 129 | +          | 
 | 130 | +    #测试AdaBoost分类函数  | 
 | 131 | +    print "[0,0]:\n",adaClassify([0,0], classifierArray)  | 
 | 132 | +    print "\n\n[[5,5],[0,0]]:\n",adaClassify([[5,5],[0,0]], classifierArray)  | 
0 commit comments