11#!/usr/bin/env python
22#-*-coding:utf-8-*-
33import random
4+ import math
45#统计各类数量
56def addValueToMat (theMat ,key ,value ,incr ):
67 if key not in theMat : #如果key没出先在theMat中
@@ -16,6 +17,7 @@ def addValueToMat(theMat,key,value,incr):
1617tag_items = dict ();
1718user_items = dict ();
1819user_items_test = dict ();#测试集数据字典
20+ item_tags = dict () #用于多样性测试
1921
2022#初始化,进行各种统计
2123def InitStat ():
@@ -29,7 +31,8 @@ def InitStat():
2931 tag = terms [2 ];
3032 addValueToMat (user_tags ,user ,tag ,1 )
3133 addValueToMat (tag_items ,tag ,item ,1 )
32- addValueToMat (user_items ,user ,item ,1 )
34+ addValueToMat (user_items ,user ,item ,1 )
35+ addValueToMat (item_tags ,item ,tag ,1 )
3336 line = data_file .readline ();
3437 else :
3538 addValueToMat (user_items_test ,user ,item ,1 )
@@ -48,8 +51,56 @@ def Recommend(usr):
4851 recommend_list [item_ ]+= wut * wit ;
4952 return sorted (recommend_list .iteritems (), key = lambda a :a [1 ],reverse = True )
5053
54+ #统计标签流行度
55+ def TagPopularity ():
56+ tagfreq = {}
57+ for user in user_tags .keys ():
58+ for tag in user_tags [user ].keys ():
59+ if tag not in tagfreq :
60+ tagfreq [tag ] = 1
61+ else :
62+ tagfreq [tag ] += 1
63+ return sorted (tagfreq .iteritems (), key = lambda a :a [1 ],reverse = True )
64+
65+ #计算余弦相似度
66+ def CosineSim (item_tags ,i ,j ):
67+ ret = 0
68+ for b ,wib in item_tags [i ].items (): #求物品i,j的标签交集数目
69+ if b in item_tags [j ]:
70+ ret += wib * item_tags [j ][b ]
71+ ni = 0
72+ nj = 0
73+ for b , w in item_tags [i ].items (): #统计 i 的标签数目
74+ ni += w * w
75+ for b , w in item_tags [j ].items (): #统计 j 的标签数目
76+ nj += w * w
77+ if ret == 0 :
78+ return 0
79+ return ret / math .sqrt (ni * nj ) #返回余弦值
80+
81+ #计算推荐列表多样性
82+ def Diversity (item_tags ,recommend_items ):
83+ ret = 0
84+ n = 0
85+ for i in dict (recommend_items ).keys ():
86+ for j in dict (recommend_items ).keys ():
87+ if i == j :
88+ continue
89+ ret += CosineSim (item_tags ,i ,j )
90+ n += 1
91+ return ret / (n * 1.0 )
92+
5193InitStat ()
5294recommend_list = Recommend ("48411" )
5395# print recommend_list
5496for recommend in recommend_list [:10 ]: #兴趣度最高的十个itemid
55- print recommend
97+ print recommend
98+
99+ #标签流行度统计
100+ tagFreq = TagPopularity ()
101+ for tag in tagFreq [:20 ]:
102+ print tag
103+
104+ #推荐列表多样性,计算时间较长
105+ diversityNum = Diversity (item_tags , recommend_list )
106+ print diversityNum
0 commit comments