Skip to content

Commit 96ceee7

Browse files
committed
Cast lambda object to list texttiling for python3
Added doctest which catches the current regression across python2 and python3
1 parent 6cf3382 commit 96ceee7

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

nltk/tokenize/texttiling.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,15 @@ class TextTilingTokenizer(TokenizerI):
5252
:param cutoff_policy: The policy used to determine the number of boundaries:
5353
`HC` (default) or `LC`
5454
:type cutoff_policy: constant
55+
56+
>>> from nltk.corpus import brown
57+
>>> tt = TextTilingTokenizer(demo_mode=True)
58+
>>> text = brown.raw()[:10000]
59+
>>> s, ss, d, b = tt.tokenize(text)
60+
>>> b
61+
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
62+
0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
63+
0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]
5564
"""
5665

5766
def __init__(self,
@@ -284,7 +293,7 @@ def _identify_boundaries(self, depth_scores):
284293

285294
depth_tuples = sorted(zip(depth_scores, range(len(depth_scores))))
286295
depth_tuples.reverse()
287-
hp = filter(lambda x:x[0]>cutoff, depth_tuples)
296+
hp = list(filter(lambda x:x[0]>cutoff, depth_tuples))
288297

289298
for dt in hp:
290299
boundaries[dt[1]] = 1

0 commit comments

Comments
 (0)