爬取爬虫框架的通用代码
import requests
def getHtmlText(url):
try:
Headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}
r = requests.get(url,headers=Headers)
r.raise_for_status() # 如果状态不是200,则为httpError异常
r.encoding = r.apparent_encoding
return r.text
except:
return "产生异常"
if __name__ == "__main__":
url ="http://news.fznews.com.cn/shehui/list.shtml"
HtmlText = getHtmlText(url)
print(HtmlText)
2、爬取照片代码
import requests
def getPicture(url):
try:
Headers = {'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}
r = requests.get(url,headers\=Headers)
r.raise\_for\_status() \# 如果状态不是200,则为httpError异常
return r.content
except:
return "产生异常"
if \_\_name\_\_ == "\_\_main\_\_":
picurl ="http://img0.dili360.com/pic/2019/10/23/5db027e9441a73i93221149.jpg"
path="C://Users//fuxingyu//Desktop//abc.jpg"
Pic=getPicture(picurl)
with open(path,'wb')as f:
f.write(Pic)
f.close()
或者
import requests
import os
url="https://pic.rmb.bdstatic.com/1cf349c922d2e0faa054de841535a0788853.gif"
root="C://Users//fuxingyu//Desktop//"
path=root+url.split('/')[-1]
try:
if not os.path.split(root):
os.mkdir(root)
if not os.path.exists(path):
r=requests.get(url)
with open(path,'wb')as f:
f.write(r.content)
f.close()
print("文件保存成功")
else:
print("文件已经存在")
except:
print("爬取失败")
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。