File tree Expand file tree Collapse file tree 1 file changed +39
-2
lines changed Expand file tree Collapse file tree 1 file changed +39
-2
lines changed Original file line number Diff line number Diff line change 1111
1212html_doc = """
1313 <html><head><title>The Dormouse's story</title></head>
14-
14+ <body>
1515 <p class="title"><b>The Dormouse's story</b></p>
1616
1717 <p class="story">Once upon a time there were three little sisters; and their names were
1818 <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
1919 <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
2020 <a href="http://example.com/tillie" class="sister" id="link3">Tillie</a>;
2121 and they lived at the bottom of a well.</p>
22-
22+ <div id="body"><div id="python">this is python topic</div></div>
2323 <p class="story">...</p>
24+ </body>
25+ </html>
2426 """
2527
2628soup = BeautifulSoup (html_doc )
2729
2830
31+ #通过标签名获取html元素
32+ title = soup .title
33+ print title
34+
35+ #多级元素获取/获取body中的第一个b标签
36+ b = soup .body .p .b
37+ print b
38+
39+ #获取全部元素/获取页面所有的a标签
40+ list_a = soup .find_all ('a' )
41+ print type (list_a )
42+ for item in list_a :
43+ print item
44+
45+ #.contents属性可以将tag的子节点以列表的方式输出
46+ head_tag = soup .head
47+ print head_tag
48+
49+ print head_tag .contents [0 ]
50+
51+ #通过tag的.children属性堆tag的直接字节点进行遍历
52+ for child in soup .body .children :
53+ print child .name
54+
55+ #通过.descendants堆孙子节点进行遍历
56+ for child in soup .body .descendants :
57+ print child .name
58+
59+ #通过.strings输出文档中的文本
60+ for string in soup .strings :
61+ print repr (string )
62+
63+ #通过.stripped_strings输出文档中的非空格空行字符串文本
64+ for string in soup .stripped_strings :
65+ print repr (string )
You can’t perform that action at this time.
0 commit comments