[dywang@dywmac zzz]$ cat crawler9.py
#!/usr/bin/env python
# coding: utf-8
import bs4
htmlfile = open('node2.html')
soup = bs4.BeautifulSoup(htmlfile, 'lxml')
tag = soup.find_all('li')
print("tyep: ", type(tag))
print("tag: ", tag)
print("tag text: ")
for data in tag:
print(data.text)
print("tag gettext: ")
for i in range(len(tag)):
print(tag[i].getText())
[dywang@dywmac zzz]$ ./crawler9.py
('tyep: ', <type 'list'>)
('tag: ', [<li><a href="node3.html" name="tex2html184">簡介</a>
</li>, <li><a href="node4.html" name="tex2html185">安裝與執行</a>
</li>, <li><a href="node5.html" name="tex2html186">內縮語法</a>
</li>, <li><a href="node6.html" name="tex2html187">括號、引號、換行、註解</a>
</li>])
tag text:
簡介
安裝與執行
內縮語法
括號、引號、換行、註解
tag gettext:
簡介
安裝與執行
內縮語法
括號、引號、換行、註解