[root@dywmac ~]# cd /usr/src/ [root@dywmac src]# wget https://www.crummy.com/software/BeautifulSoup/bs4/\ download/4.0/beautifulsoup4-4.1.0.tar.gz [root@dywmac src]# tar zxvf beautifulsoup4-4.1.0.tar.gz [root@dywmac ~]# cp -a beautifulsoup4-4.1.0/bs4 /usr/lib/python2.6/site-packages/
[dywang@dywmac zzz]$ cat crawler5.py #!/usr/bin/env python # coding: utf-8 import requests, bs4 url = 'http://dywang.csie.cyut.edu.tw/dywang/rhce7/' htmlfile = requests.get(url) soup = bs4.BeautifulSoup(htmlfile.text, 'lxml')
[dywang@dywmac zzz]$ ./crawler5.py <class 'bs4.BeautifulSoup'>
[dywang@dywmac zzz]$ wget https://dywang.csie.cyut.edu.tw/dywang/pythonProgram/node2.html
[dywang@dywmac zzz]$ cat crawler6.py
#!/usr/bin/env python
# coding: utf-8
import bs4
htmlfile = open('node2.html')
soup = bs4.BeautifulSoup(htmlfile, 'lxml')
print("class tyep: ", type(soup))
print("title: ", soup.title)
[dywang@dywmac zzz]$ ./crawler6.py
('class tyep: ', <class 'bs4.BeautifulSoup'>)
('title: ', <title>認識 Python</title>)