scrapy 框架 报错?为啥说没有headers 属性?

from Espider.pipelines.mongodbpipeline import mongodb_pipeline
from scrapy.exceptions import DropItem
import requests,os
import hashlib




class searchwebsitepipeline(mongodb_pipeline):
    def __int__(self):
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"}
    def get_max_size_url(self,url_list):
          urlSize=[]
          index=[]
          for url in url_list:
              res = requests.get(url, headers=self.headers)
              save_path = os.path.join("./image", hashlib.sha1(res.contet))
              with open(save_path, "wb") as code:
                  code.write(save_path)
              urlSize.append({os.path.getsize(save_path):url})
              for data in urlSize:
                  index.append(list(data.keys())[0])
              for data in urlSize:
                  if list(data.keys())[0]==max(index):
                      return list(data.values())[0]

    def process_item(self, item, spider):
        print('进入mongodb 你来了吗')

我在pipeline 里面定义一个headers 为啥说没有?有人知道吗?

018-11-26 03:42:36 [scrapy.core.scraper] ERROR: Error processing {'androidUpProductAbstract': '',
 'androidUpProductDetailType': 2,
 'androidUpProductLink': '',
 'androidUpProductName': '',
 'businessName': '佛山饭堂承包公司',
 'iconUrl': ['1.ico', '1.ico'],
 'iosUpProductAbstract': '',
 'iosUpProductDetailType': 1,
 'iosUpProductLink': '',
 'iosUpProductName': ''}
Traceback (most recent call last):
  File "/home/shenjianlin/.local/lib/python3.4/site-packages/twisted/internet/defer.py", line 653, in _runCallbacks
    current.result = callback(current.result, *args, **kw)
  File "/home/shenjianlin/my_project/Espider/Espider/pipelines/searchwebsitepipeline.py", line 37, in process_item
    iconUrl=self.get_max_size_url(item['iconUrl'])
  File "/home/shenjianlin/my_project/Espider/Espider/pipelines/searchwebsitepipeline.py", line 17, in get_max_size_url
    res = requests.get(url, headers=self.headers)
AttributeError: 'searchwebsitepipeline' object has no attribute 'headers'

打印链接

阅读 1.9k
1 个回答

你的 __int__方法写错了, 应该是__init__.

如果还不行, 可以尝试把headers的初始化放在open_spider()方法内:

class searchwebsitepipeline(mongodb_pipeline):
    def open_spider(self, spider):
        self.headers = {'User-Agent': 'myUserAgent'}
        
    def get_max_size_url(self,url_list):
        pass

    def process_item(self, item, spider):
        pass

参考:
Item Pipeline Doc

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题