@@ -271,6 +271,37 @@ def manhattan_distances(X, Y=None, sum_over_features=True,
271271    return  D 
272272
273273
274+ def  cosine_distances (X , Y = None ):
275+     """ 
276+     Compute cosine distance between samples in X and Y. 
277+ 
278+     Cosine distance is defined as 1.0 minus the cosine similarity. 
279+ 
280+     Parameters 
281+     ---------- 
282+     X : array_like, sparse matrix 
283+         with shape (n_samples_X, n_features). 
284+ 
285+     Y : array_like, sparse matrix (optional) 
286+         with shape (n_samples_Y, n_features). 
287+ 
288+     Returns 
289+     ------- 
290+     distance matrix : array_like 
291+         An array with shape (n_samples_X, n_samples_Y). 
292+          
293+     See also 
294+     -------- 
295+     sklearn.metrics.pairwise.cosine_similarity 
296+     scipy.spatial.distance.cosine (dense matrices only) 
297+     """ 
298+     # 1.0 - cosine_similarity(X, Y) without copy 
299+     S  =  cosine_similarity (X , Y )
300+     S  *=  - 1 
301+     S  +=  1 
302+     return  S     
303+ 
304+ 
274305# Kernels 
275306def  linear_kernel (X , Y = None ):
276307    """ 
@@ -525,11 +556,12 @@ def chi2_kernel(X, Y=None, gamma=1.):
525556PAIRWISE_DISTANCE_FUNCTIONS  =  {
526557    # If updating this dictionary, update the doc in both distance_metrics() 
527558    # and also in pairwise_distances()! 
559+     'cityblock' : manhattan_distances ,
560+     'cosine' : cosine_distances ,
528561    'euclidean' : euclidean_distances ,
529562    'l2' : euclidean_distances ,
530563    'l1' : manhattan_distances ,
531-     'manhattan' : manhattan_distances ,
532-     'cityblock' : manhattan_distances , }
564+     'manhattan' : manhattan_distances , }
533565
534566
535567def  distance_metrics ():
@@ -545,6 +577,7 @@ def distance_metrics():
545577    metric           Function 
546578    ============     ==================================== 
547579    'cityblock'      metrics.pairwise.manhattan_distances 
580+     'cosine'         metrics.pairwise.cosine_distances 
548581    'euclidean'      metrics.pairwise.euclidean_distances 
549582    'l1'             metrics.pairwise.manhattan_distances 
550583    'l2'             metrics.pairwise.euclidean_distances 
@@ -585,25 +618,27 @@ def pairwise_distances(X, Y=None, metric="euclidean", n_jobs=1, **kwds):
585618    If Y is given (default is None), then the returned matrix is the pairwise 
586619    distance between the arrays from both X and Y. 
587620
588-     Please note that support for sparse matrices is currently limited to those  
589-     metrics listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS . 
621+     Please note that support for sparse matrices is currently limited to  
622+     'euclidean', 'l2' and 'cosine' . 
590623
591624    Valid values for metric are: 
592625
593-     - from scikit-learn: ['euclidean', 'l2', 'l1', 'manhattan', 'cityblock'] 
626+     - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',  
627+       'manhattan']  
594628
595629    - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev', 
596-       'correlation', 'cosine', ' dice', 'hamming', 'jaccard', 'kulsinski', 
597-       'mahalanobis', ' matching', 'minkowski', 'rogerstanimoto', 'russellrao', 
598-       'seuclidean', ' sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'] 
630+       'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis ', 
631+       'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean ', 
632+       'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']        
599633      See the documentation for scipy.spatial.distance for details on these 
600634      metrics. 
601635
602-     Note in the case of 'euclidean' and 'cityblock' (which are valid 
603-     scipy.spatial.distance metrics), the values will use the scikit-learn 
604-     implementation, which is faster and has support for sparse matrices. 
605-     For a verbose description of the metrics from scikit-learn, see the 
606-     __doc__ of the sklearn.pairwise.distance_metrics function. 
636+     Note that in the case of 'cityblock', 'cosine' and 'euclidean' (which are 
637+     valid scipy.spatial.distance metrics), the scikit-learn implementation 
638+     will be used, which is faster and has support for sparse matrices (except 
639+     for 'cityblock'). For a verbose description of the metrics from 
640+     scikit-learn, see the __doc__ of the sklearn.pairwise.distance_metrics 
641+     function. 
607642
608643    Parameters 
609644    ---------- 
0 commit comments