Cast lambda object to list texttiling for python3

sahutd · sahutd · commit 96ceee77ec0b · 2015-06-10T16:25:20.000+05:30
Added doctest which catches the current regression across python2 and
python3
diff --git a/nltk/tokenize/texttiling.py b/nltk/tokenize/texttiling.py
@@ -52,6 +52,15 @@ class TextTilingTokenizer(TokenizerI):
     :param cutoff_policy: The policy used to determine the number of boundaries:
       `HC` (default) or `LC`
     :type cutoff_policy: constant
+
+    >>> from nltk.corpus import brown
+    >>> tt = TextTilingTokenizer(demo_mode=True)
+    >>> text = brown.raw()[:10000]
+    >>> s, ss, d, b = tt.tokenize(text)
+    >>> b
+    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
+     0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
+     0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]
     """
 
     def __init__(self,
@@ -284,7 +293,7 @@ def _identify_boundaries(self, depth_scores):
 
         depth_tuples = sorted(zip(depth_scores, range(len(depth_scores))))
         depth_tuples.reverse()
-        hp = filter(lambda x:x[0]>cutoff, depth_tuples)
+        hp = list(filter(lambda x:x[0]>cutoff, depth_tuples))
 
         for dt in hp:
             boundaries[dt[1]] = 1