怎么总是采集完第一页的数据,就停止了,我这代码哪里有问题吗?
spider代码
import scrapy
import json
from douyu.items import DouyuItem
class MeinvSpider(scrapy.Spider):
name = 'meinv'
allowed_domains = ['capi.douyucdn.cn']
offset = 0
url = "http://capi.douyucdn.cn/api/v1/getVerticalRoom?limit=20&offset="
start_urls = [url + str(offset)]
def parse(self, response):
res = json.loads(response.text)
for each in res['data']:
item = DouyuItem()
item["nickname"] = each["nickname"]
item["imagelink"] = each["vertical_src"]
yield item
self.offset += 20
yield scrapy.Request(self.url+str(self.offset),callable=self.parse)
图片管道代码
#获取设置
import scrapy
from scrapy.utils.project import get_project_settings
from scrapy.pipelines.images import ImagesPipeline
import os
class DouyuPipeline(ImagesPipeline):
#获取settings文件里设置的变量值
IMAGES_STORE = get_project_settings().get("IMAGES_STORE")
#获取图片链接,并发送请求
def get_media_requests(self,item,info):
image_url = item["imagelink"]
yield scrapy.Request(image_url,meta={
"item":item
})
#处理图片
def item_completed(self,results,item,info):
# ok判断是否下载成功
image_paths = [x["path"] for ok, x in results if ok]
if not image_paths:
raise DropItem("Item contains no images")
#os.rename(self.IMAGES_STORE + image_path[0], self.IMAGES_STORE + item["nickname"] + ",jpg")
item["imagePath"] = image_paths[0]
return item
Request的参数名写错了 callback