def process_item(self, item, spider):
url = item['file_url']
name = item['name']
result = self.post.aggregate(
[
{"$group": {"_id": {"url": url, "name": name}}}
]
)
if result:
pass
else:
self.post.insert({"url": url, "name": name})
return item
def file_path(self, request, response=None, info=None):
return request.meta.get('filename', '')
def get_media_requests(self, item, info):
file_url = item['file_url']
meta = {'filename': item['name']}
yield Request(url=file_url, meta=meta)
这里我继承了一个pipeline 然后 用组合键判断去重 后然后下载,否则不下载,这个如何写?