class jingzhun(RedisCrawlSpider):
name = "jingzhun"
allowed_domains = []
# start_urls = ['https://rong.36kr.com/']
custom_settings = {
"RANDOM_DELAY": 20
}
redis_key = 'jingzhun:starturls'
def __init__(self, *args, **kwargs):
domain = kwargs.pop('domain', '')
self.allowed_domains = filter(None, domain.split(','))
super(jingzhun, self).__init__(*args, **kwargs)
self.cookie_str = "acw_tc=b65cfd2515395760831792797e7a30fed7278a95d7c68d0dcad0b9cbc4ac1b; kwlo_iv=1h; kr_stat_uuid=TRRfp25694452; Hm_lvt_e8ec47088ed7458ec32cde3617b23ee3=1541062621,1541150329,1541661241; Hm_lpvt_e8ec47088ed7458ec32cde3617b23ee3=1541667148; download_animation=1; _kr_p_se=9867c144-9614-4298-96f7-0e46ed5efefe; krid_user_id=2014445492; krid_user_version=2; kr_plus_id=2014445492; kr_plus_token=8dnyAhS2t87wW1PU1p91L_jUAHPFmepeJJ75____; kr_plus_utype=0; device-uid=5fa2cef0-e334-11e8-978f-67115035d613"
self.headers = {"Referer": "https://rong.36kr.com/list/detail&?sortField=HOT_SCORE",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
, "Host": "rong.36kr.com"}
self.co_headers = {"Referer": "https://rong.36kr.com/list/detail&?sortField=HOT_SCORE",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"
, "Host": "rong.36kr.com", "cookie": self.cookie_str}
self.js_read = open('./js/jingzhun.js', 'r').read()
def start_requests(self):
yield scrapy.Request(url="https://rong.36kr.com/n/api/column/0/company?sortField=HOT_SCORE&p=1",
callback=self.get_all_info, headers=self.headers, dont_filter=True,
cookies=get_cookies(self.cookie_str))
这里的redis_key是从redis 队列里面读的,但是现在这个网址需要cookie 和header 才能请求获取数据,请问一下,这种情况的格式如何写?求人指教,谢谢