11#!/usr/bin/env python 
22#-*-coding:utf-8-*- 
33import  random   
4+ import  math 
45#统计各类数量   
56def  addValueToMat (theMat ,key ,value ,incr ):  
67    if  key  not  in theMat : #如果key没出先在theMat中   
@@ -16,6 +17,7 @@ def addValueToMat(theMat,key,value,incr):
1617tag_items  =  dict ();  
1718user_items  =  dict ();  
1819user_items_test  =  dict ();#测试集数据字典   
20+ item_tags  =  dict ()        #用于多样性测试 
1921
2022#初始化,进行各种统计   
2123def  InitStat ():  
@@ -29,7 +31,8 @@ def InitStat():
2931            tag = terms [2 ];  
3032            addValueToMat (user_tags ,user ,tag ,1 )  
3133            addValueToMat (tag_items ,tag ,item ,1 )  
32-             addValueToMat (user_items ,user ,item ,1 )  
34+             addValueToMat (user_items ,user ,item ,1 )
35+             addValueToMat (item_tags ,item ,tag ,1 )  
3336            line  =  data_file .readline ();  
3437        else :  
3538            addValueToMat (user_items_test ,user ,item ,1 )  
@@ -48,8 +51,56 @@ def Recommend(usr):
4851                    recommend_list [item_ ]+= wut * wit ;  
4952    return  sorted (recommend_list .iteritems (), key = lambda  a :a [1 ],reverse = True )
5053
54+ #统计标签流行度 
55+ def  TagPopularity ():
56+     tagfreq  =  {}
57+     for  user  in  user_tags .keys ():
58+         for  tag  in  user_tags [user ].keys ():
59+             if  tag  not  in tagfreq :
60+                 tagfreq [tag ] =  1 
61+             else :
62+                 tagfreq [tag ] += 1 
63+     return  sorted (tagfreq .iteritems (), key = lambda  a :a [1 ],reverse = True )
64+ 
65+ #计算余弦相似度 
66+ def  CosineSim (item_tags ,i ,j ):
67+     ret  =  0 
68+     for  b ,wib  in  item_tags [i ].items ():     #求物品i,j的标签交集数目 
69+         if  b  in  item_tags [j ]:
70+             ret  +=  wib  *  item_tags [j ][b ]
71+     ni  =  0 
72+     nj  =  0 
73+     for  b , w  in  item_tags [i ].items ():      #统计 i 的标签数目 
74+         ni  +=  w  *  w 
75+     for  b , w  in  item_tags [j ].items ():      #统计 j 的标签数目 
76+         nj  +=  w  *  w 
77+     if  ret  ==  0 :
78+         return  0 
79+     return  ret / math .sqrt (ni  *  nj )          #返回余弦值        
80+ 
81+ #计算推荐列表多样性 
82+ def  Diversity (item_tags ,recommend_items ):
83+     ret  =  0 
84+     n  =  0 
85+     for  i  in  dict (recommend_items ).keys ():
86+         for  j  in  dict (recommend_items ).keys ():
87+             if  i  ==  j :
88+                 continue 
89+             ret  +=  CosineSim (item_tags ,i ,j )
90+             n  +=  1 
91+     return  ret / (n  *  1.0 )
92+ 
5193InitStat ()
5294recommend_list  =  Recommend ("48411" )
5395# print recommend_list 
5496for  recommend  in  recommend_list [:10 ]:  #兴趣度最高的十个itemid 
55-     print  recommend 
97+     print  recommend 
98+ 
99+ #标签流行度统计     
100+ tagFreq  =  TagPopularity ()
101+ for  tag  in  tagFreq [:20 ]:
102+     print  tag 
103+ 
104+ #推荐列表多样性,计算时间较长 
105+ diversityNum  =  Diversity (item_tags , recommend_list )
106+ print  diversityNum 
0 commit comments