1+ #-*-coding:utf8-*-
2+ '''
3+ Created on 2016-5-15
4+
5+ @author: thinkgamer
6+ '''
7+ from numpy import *
8+
9+ def loadDataSet (filename ,delim = "\t " ):
10+ fr = open (filename )
11+ stringArr = [line .strip ().split (delim ) for line in fr .readlines ()]
12+ datArr = [map (float , line ) for line in stringArr ]
13+ return mat (datArr )
14+
15+ #dataMat对应数据集,N个特征
16+ def pca (dataMat , topNfeat = 9999999 ):
17+ meanVals = mean (dataMat , axis = 0 ) #求平均值
18+ meanRemoved = dataMat - meanVals #去平均值
19+ covMat = cov (meanRemoved ,rowvar = 0 ) #计算协防差矩阵
20+ eigVals , eigVects = linalg .eig (mat (covMat ))
21+ eigValInd = argsort (eigVals )
22+ #从小到大对N个值排序
23+ eigValInd = eigValInd [: - (topNfeat + 1 ) : - 1 ]
24+ redEigVects = eigVects [:, eigValInd ]
25+ #将数据转换到新空间
26+ lowDDataMat = meanRemoved * redEigVects
27+ reconMat = (lowDDataMat * redEigVects .T ) + meanVals
28+ return lowDDataMat , reconMat
29+
30+ #测试
31+ dataMat = loadDataSet ("testSet.txt" )
32+ lowDMat , reconMat = pca (dataMat ,1 )
33+ print shape (lowDMat )
34+
35+ '''
36+ #show
37+ import matplotlib
38+ import matplotlib.pyplot as plt
39+ fig = plt.figure()
40+ ax = fig.add_subplot(111)
41+ ax.scatter(dataMat[:,0].flatten().A[0], dataMat[:,1].flatten().A[0], marker='^', s = 90 )
42+ ax.scatter(reconMat[:,0].flatten().A[0], reconMat[:,1].flatten().A[0],marker='o', s = 50 , c ='red' )
43+ plt.show()
44+ '''
45+
46+ #将NaN替换成平均值函数
47+ def replaceNanWithMean ():
48+ datMat = loadDataSet ('secom.data' , ' ' )
49+ numFeat = shape (datMat )[1 ]
50+ for i in range (numFeat ):
51+ meanVal = mean (datMat [nonzero (~ isnan (datMat [:,i ].A ))[0 ],i ]) #values that are not NaN (a number)
52+ datMat [nonzero (isnan (datMat [:,i ].A ))[0 ],i ] = meanVal #set NaN values to mean
53+ return datMat
54+
55+ #加载数据
56+ dataMat = replaceNanWithMean ()
57+ #去除均值
58+ meanVals = mean (dataMat , axis = 0 )
59+ meanRemoved = dataMat - meanVals
60+ #计算协方差
61+ covMat = cov (meanRemoved , rowvar = 0 )
62+
63+ #特征值分析
64+ eigVals , eigVects = linalg .eig (mat (covMat ))
65+ print eigVals
0 commit comments