from __future__ import unicode_literals
import sys
from scrapy.pipelines.images import ImagesPipeline
from scrapy.exceptions import DropItem
from scrapy.http import Request
import os
reload(sys)
sys.setdefaultencoding('utf-8')
class TetePipeline(ImagesPipeline):
def get_media_requests(self, item, info):
for image_url in item['image_urls']:
yield Request(image_url)
def item_completed(self, results, item, info):
item['image'] = []
image_paths = [x['path'] for ok, x in results if ok]
if not image_paths:
raise DropItem('Items contains no images')
item['image_paths'] = image_paths
for i in item['image_paths']:
item['image'].append(item['image_titles']+i[-8:])
item['image_paths'] = item['image']
return item
想改变scrapy下载的图片的名称,使用自定义名字,打算使用file_path,但是file_path改动太大。
这个可以下载图片,但是图片名称是是图像url的 SHA1 hash,为什么呢?
你看我的,最下面的filename就是更改了图片的名字以及存放的目录