1+ #contributed by Ben Hammer, 2013
2+
3+
4+ def tied_rank (x ):
5+ """
6+ Computes the tied rank of elements in x.
7+
8+ This function computes the tied rank of elements in x.
9+
10+ Parameters
11+ ----------
12+ x : list of numbers, numpy array
13+
14+ Returns
15+ -------
16+ score : list of numbers
17+ The tied rank f each element in x
18+
19+ """
20+ sorted_x = sorted (zip (x ,range (len (x ))))
21+ r = [0 for k in x ]
22+ cur_val = sorted_x [0 ][0 ]
23+ last_rank = 0
24+ for i in range (len (sorted_x )):
25+ if cur_val != sorted_x [i ][0 ]:
26+ cur_val = sorted_x [i ][0 ]
27+ for j in range (last_rank , i ):
28+ r [sorted_x [j ][1 ]] = float (last_rank + 1 + i )/ 2.0
29+ last_rank = i
30+ if i == len (sorted_x )- 1 :
31+ for j in range (last_rank , i + 1 ):
32+ r [sorted_x [j ][1 ]] = float (last_rank + i + 2 )/ 2.0
33+ return r
34+
35+ def auc (actual , posterior ):
36+ """
37+ Computes the area under the receiver-operater characteristic (AUC)
38+
39+ This function computes the AUC error metric for binary classification.
40+
41+ Parameters
42+ ----------
43+ actual : list of binary numbers, numpy array
44+ The ground truth value
45+ posterior : same type as actual
46+ Defines a ranking on the binary numbers, from most likely to
47+ be positive to least likely to be positive.
48+
49+ Returns
50+ -------
51+ score : double
52+ The mean squared error between actual and posterior
53+
54+ """
55+ r = tied_rank (posterior )
56+ num_positive = len ([0 for x in actual if x == 1 ])
57+ num_negative = len (actual )- num_positive
58+ sum_positive = sum ([r [i ] for i in range (len (r )) if actual [i ]== 1 ])
59+ auc = ((sum_positive - num_positive * (num_positive + 1 )/ 2.0 ) /
60+ (num_negative * num_positive ))
61+ return auc
0 commit comments