python爬虫

叫我瞄大人

爬取爬虫框架的通用代码

import requests  
def getHtmlText(url):  
    try:  
        Headers = {  
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}  
        r = requests.get(url,headers=Headers)  
        r.raise_for_status()  # 如果状态不是200,则为httpError异常  
        r.encoding = r.apparent_encoding  
        return r.text  
    except:  
        return "产生异常"  
  
if __name__ == "__main__":  
    url ="http://news.fznews.com.cn/shehui/list.shtml"  
  HtmlText = getHtmlText(url)  
    print(HtmlText)

image.png

image.png
image.png

2、爬取照片代码

import requests  
def getPicture(url):  
    try:  
        Headers = {'user-agent':  
                       'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}  
        r = requests.get(url,headers\=Headers)  
        r.raise\_for\_status()  \# 如果状态不是200,则为httpError异常  
  return r.content  
    except:  
        return "产生异常"  
  
if \_\_name\_\_ == "\_\_main\_\_":  
    picurl ="http://img0.dili360.com/pic/2019/10/23/5db027e9441a73i93221149.jpg"  
  path="C://Users//fuxingyu//Desktop//abc.jpg"  
  Pic=getPicture(picurl)  
    with open(path,'wb')as f:  
        f.write(Pic)  
        f.close()

或者

import requests  
import os  
url="https://pic.rmb.bdstatic.com/1cf349c922d2e0faa054de841535a0788853.gif"  
root="C://Users//fuxingyu//Desktop//"  
path=root+url.split('/')[-1]  
try:  
    if not os.path.split(root):  
        os.mkdir(root)  
    if not os.path.exists(path):  
        r=requests.get(url)  
        with open(path,'wb')as f:  
            f.write(r.content)  
            f.close()  
            print("文件保存成功")  
    else:  
        print("文件已经存在")  
except:  
    print("爬取失败")
阅读 1.1k

个人编程学习
python,matlab,VB

喜欢追星的非科班的编程爱好者

461 声望
76 粉丝
0 条评论

喜欢追星的非科班的编程爱好者

461 声望
76 粉丝
文章目录
宣传栏