scrapy怎么总是只采集第一页,代码怎么改?

怎么总是采集完第一页的数据,就停止了,我这代码哪里有问题吗?
spider代码

import scrapy
import json
from douyu.items import DouyuItem


class MeinvSpider(scrapy.Spider):
    name = 'meinv'
    allowed_domains = ['capi.douyucdn.cn']
    
    offset = 0
    url = "http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset="
    start_urls = [url + str(offset)]

    def parse(self, response):
        res = json.loads(response.text)
        for each in res['data']:
            item = DouyuItem()
            item["nickname"] = each["nickname"]
            item["imagelink"] = each["vertical_src"] 
            yield item
            
        self.offset += 20
        yield scrapy.Request(self.url+str(self.offset),callable=self.parse)

图片管道代码

#获取设置
import scrapy
from scrapy.utils.project import get_project_settings
from scrapy.pipelines.images import ImagesPipeline
import os

class DouyuPipeline(ImagesPipeline):

    #获取settings文件里设置的变量值
    IMAGES_STORE = get_project_settings().get("IMAGES_STORE")

    #获取图片链接,并发送请求
    def get_media_requests(self,item,info):
        image_url = item["imagelink"]
        yield scrapy.Request(image_url,meta={
            "item":item
        })
    
    #处理图片
    def item_completed(self,results,item,info):
        # ok判断是否下载成功
        image_paths = [x["path"] for ok, x in results if ok]
        
        if not image_paths:
            raise DropItem("Item contains no images")
        
        #os.rename(self.IMAGES_STORE + image_path[0], self.IMAGES_STORE + item["nickname"] + ",jpg")
        item["imagePath"] = image_paths[0]

        return item
    
阅读 2.5k
1 个回答

Request的参数名写错了 callback

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题