.. Copyright (C) 2001-2012 NLTK Project .. For license information, see LICENSE.TXT ================= Utility functions ================= >>> from nltk.util import * >>> from nltk.tree import Tree >>> print_string("This is a long string, therefore it should break", 25) This is a long string, therefore it should break >>> re_show("[a-z]+", "sdf123") {sdf}123 >>> tree = Tree(5, ... [Tree(4, [Tree(2, [1, 3])]), ... Tree(8, [Tree(6, [7]), 9])]) >>> for x in breadth_first(tree): ... if isinstance(x, int): print x ... else: print x.node 5 4 8 2 6 9 1 3 7 >>> for x in breadth_first(tree, maxdepth=2): ... if isinstance(x, int): print x ... else: print x.node 5 4 8 2 6 9 >>> invert_dict({1: 2}) defaultdict(, {2: 1}) >>> invert_dict({1: [3, 4, 5]}) defaultdict(, {3: [1], 4: [1], 5: [1]}) Testing HTML cleaning --------------------- >>> html = """ ...
...
... ... ... ... ... """ >>> [link.strip() for link in re.split("\n+", clean_html(html))] ['Skip Links', 'AOL', 'My AOL', 'Mail', '', '', 'Get The All-Amer... Ringtones'] >>> clean_html("

Heading

Test

") 'Heading Test' >>> clean_html(" aaa

bbb ") 'aaa bbb'