自学Python,scrapy模拟登录遇到问题,拉勾网怎么也登陆不成功,试验了几十遍,scrapy提示“scrapy.exceptions.NotSupported: Unsupported URL scheme 'javascript': no handler available for that scheme”
请高手帮忙看看代码,一解心头窒闷。另外,我用selenium模拟登录可以,就是想用scrapy原生态的登录。代码如下
import scrapy
import re
class DdooSpider(scrapy.Spider):
name = 'ddoo'
allowed_domains = ['www.lagou.com']
start_urls = ['https://www.lagou.com']
headers={'Host':'passport.lagou.com',
'Origin':'https://passport.lagou.com',
'Referer':'https://passport.lagou.com/login/login.html',
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.111 Safari/537.36",
"X-Requested-With":"XMLHttpRequest",
"Content-Type":'application/x-www-form-urlencoded;charset=UTF-8'
}
def start_requests(self):
return [scrapy.Request('https://passport.lagou.com/login/login.html',headers=self.headers,callback=self.after,meta={'cookiejar':1})]
def parse(self, response):
pass
def after(self, response):
match_obj = re.search(r".*window.X_Anti_Forge_Token = '(.*?)'.*window.X_Anti_Forge_Code = '(.*?)'", response.text, re.S)
forge_code=match_obj.group(2)
forge_token=match_obj.group(1)
print(forge_code,forge_token)
self.headers['X-Anit-Forge-Code']=forge_code
self.headers['X-Anit-Forge-Token']=forge_token
form_data={'isValidate':'true',
'username':'456223451@qq.com',
'password':'ef578e9600a81c87d85133096c2b01e4',
'request_form_verifyCode':'',
'submit':'',
# 'X-Anit-Forge-Code':forge_code,
# 'X-Anit-Forge-Token':forge_token
}
return [scrapy.FormRequest.from_response(response,meta={"cookiejar":response.meta["cookiejar"]},headers=self.headers,formdata=form_data,callback=self.ceshi,dont_filter=True)]
def ceshi(self,response):
print(response.url)
print(response.text)
建议打开浏览器看看拉勾是这么登录的,或者尝试下selenium登录后取出cookie
