import json
from baidu.items import BaiduItem
import scrapy
class PicSpider(scrapy.Spider):
name = "pic"
def start_requests(self):
url = "https://image.baidu.com/search/acjson?tn=resultjson_com&logid=10526330116477428204&ipn=rj&ct=201326592&is=&fp=result&fr=&word=%E6%B8%85%E7%BA%AF%E7%BE%8E%E5%A5%B3&cg=girl&queryWord=%E6%B8%85%E7%BA%AF%E7%BE%8E%E5%A5%B3&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&expermode=&nojc=&isAsync=&pn=90&rn=30&gsm=5a&1655746238117="
yield scrapy.Request(url=url, callback=self.parse)
# def parse(self, response, address, pid):
def parse(self, response):
item = BaiduItem()
res = json.loads(response.text)
imgBox = res['data']
for pic in imgBox:
if(pic['middleURL']):
item['image_url'] = pic['middleURL']
yield item
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'baidu.pipelines.BaiduPipeline': 300,
'scrapy.pipelines.images.ImagesPipeline': 1,
}
# 配置pipeline,设定需要进行处理的图片路径
IMAGES_URLS_FIELD = "image_url"
project_dir = os.path.abspath(os.path.dirname(__file__))
IMAGES_STORE = os.path.join(project_dir, 'images')
不知道是什么问题。
应该就是单纯的 item['image_url'] = pic['middleURL']
这里出错,但还是搞不定。
按照大家的别人的改成[pic['middleURL']]
也还是报错。
对了我用的的2.7版本的 scrapy
if(pic['middleURL']):
是这句错了
你这个判断没有意义,你应该先判断pic有没有这个key
python写的少,我记得字典判断key是否存在,应该是这样
pic.has_key('middleURL')
另外,如果确实为空,那就查scrapy请求爬取的数据对不对了