adding Ben's auc code.

CamDavidsonPilon · CamDavidsonPilon · commit 2ff1a85122f1 · 2013-06-09T23:46:24.000-04:00
diff --git a/Chapter7_BayesianMachineLearning/auc.py b/Chapter7_BayesianMachineLearning/auc.py
@@ -0,0 +1,61 @@
+#contributed by Ben Hammer, 2013 
+
+
+def tied_rank(x):
+    """
+    Computes the tied rank of elements in x.
+
+    This function computes the tied rank of elements in x.
+
+    Parameters
+    ----------
+    x : list of numbers, numpy array
+
+    Returns
+    -------
+    score : list of numbers
+            The tied rank f each element in x
+
+    """
+    sorted_x = sorted(zip(x,range(len(x))))
+    r = [0 for k in x]
+    cur_val = sorted_x[0][0]
+    last_rank = 0
+    for i in range(len(sorted_x)):
+        if cur_val != sorted_x[i][0]:
+            cur_val = sorted_x[i][0]
+            for j in range(last_rank, i): 
+                r[sorted_x[j][1]] = float(last_rank+1+i)/2.0
+            last_rank = i
+        if i==len(sorted_x)-1:
+            for j in range(last_rank, i+1): 
+                r[sorted_x[j][1]] = float(last_rank+i+2)/2.0
+    return r
+
+def auc(actual, posterior):
+    """
+    Computes the area under the receiver-operater characteristic (AUC)
+
+    This function computes the AUC error metric for binary classification.
+
+    Parameters
+    ----------
+    actual : list of binary numbers, numpy array
+             The ground truth value
+    posterior : same type as actual
+                Defines a ranking on the binary numbers, from most likely to
+                be positive to least likely to be positive.
+
+    Returns
+    -------
+    score : double
+            The mean squared error between actual and posterior
+
+    """
+    r = tied_rank(posterior)
+    num_positive = len([0 for x in actual if x==1])
+    num_negative = len(actual)-num_positive
+    sum_positive = sum([r[i] for i in range(len(r)) if actual[i]==1])
+    auc = ((sum_positive - num_positive*(num_positive+1)/2.0) /
+           (num_negative*num_positive))
+    return auc