We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6cf3382 commit 96ceee7Copy full SHA for 96ceee7
nltk/tokenize/texttiling.py
@@ -52,6 +52,15 @@ class TextTilingTokenizer(TokenizerI):
52
:param cutoff_policy: The policy used to determine the number of boundaries:
53
`HC` (default) or `LC`
54
:type cutoff_policy: constant
55
+
56
+ >>> from nltk.corpus import brown
57
+ >>> tt = TextTilingTokenizer(demo_mode=True)
58
+ >>> text = brown.raw()[:10000]
59
+ >>> s, ss, d, b = tt.tokenize(text)
60
+ >>> b
61
+ [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
62
+ 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
63
+ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]
64
"""
65
66
def __init__(self,
@@ -284,7 +293,7 @@ def _identify_boundaries(self, depth_scores):
284
293
285
294
depth_tuples = sorted(zip(depth_scores, range(len(depth_scores))))
286
295
depth_tuples.reverse()
287
- hp = filter(lambda x:x[0]>cutoff, depth_tuples)
296
+ hp = list(filter(lambda x:x[0]>cutoff, depth_tuples))
288
297
289
298
for dt in hp:
290
299
boundaries[dt[1]] = 1
0 commit comments