这个爬虫不停报错,我一共写了两个爬虫,大家帮忙看一下

wisdow
  • 3
新手上路,请多包涵
from bs4 import BeautifulSoup
import requests
import os
from multiprocessing import Pool
from urllib.request import urlretrieve

total_num = 3
def url_open(url):
    web_data = requests.get(url)
    html = BeautifulSoup(web_data.text, 'lxml')
    return html

def get_all_cover_url(url):#获得所有封面url
    for num in range(2,total_num+1):#每个页面的url
        total_url = url + 'page/{}'.format(num)
        html = url_open(total_url)
        url_list = [link.attrs['href'] for link in html.select('#pins > li > a')]
        #print(url_list)
        return url_list

#get_all_cover_url(start_url)

def from_cover_get_url_list(url):
    html = url_open(url)
    img_num = int([i.get_text() for i in html.select('div.pagenavi > a > span')][-2])#['«上一组', '2', '3', '4', '31', '下一页»']
    response = html.select('body > div.main > div.content > div.main-image > p > a > img')
    s_ = response[0].get('src')#第一个图片地址
    img_url_list = [s_[:-6] + '0' + str(num) + s_[-4:]
                    if num<10
                    else s_[:-6] + str(num) + s_[-4:]
                    for num in range(1,img_num+1)]
    return img_url_list
    #print(img_url_list)
#get_img_url_list(url='http://www.mzitu.com/84934')

def download(start_url='http://www.mzitu.com/',folder='妹子图'):
    try:
        os.mkdir(folder)
        os.chdir(folder)
    except:
        os.chdir(folder)
        all_cover_url = get_all_cover_url(start_url)#获得所有系列的链接
        for cover_url in all_cover_url:
            img_url_list = from_cover_get_url_list(cover_url)
            for img_url in img_url_list:
                #'http://i.meizitu.net/2017/02/18a01.jpg'
                filename = img_url[21:]
                urlretrieve(img_url,filename)
                print(img_url)

#FileNotFoundError: [Errno 2] No such file or directory: '2017/02/18a01.jpg'报错

if __name__ == '__main__':
    #pool = Pool()
    download()
评论
阅读 1.8k
1 个回答

好吧,很抱歉,我给你的评论是有问题的。我尝试的跑了你的代码。并且修改了一下当作测试的。如下代码是可以通过的。

filename = 'image_test.jpg' #img_url[21:]

图片描述

宣传栏