使用request用手机号的方式模拟登录知乎,返回码是200 ,但不能成功登录
顺便说一下这个_xsrf是提交表单时的一个动态数据,我已获取
# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
def get_headers(headers):
list = [str(i) for i in headers.split('\n')]
list2 = []
for i in list:
list3 = i.split(':',1)
list2.append(list3)
dict1 = dict(list2)
return dict1
if __name__ == '__main__':
url = 'https://www.zhihu.com/signin?next=/'
dict_data = {'accont': 'phone_num','password':'123456'}
header = '''Accept:*/*
Accept-Encoding:gzip, deflate, br
Accept-Language:zh-CN,zh;q=0.8
Connection:keep-alive
Content-Type:application/x-www-form-urlencoded; charset=UTF-8
Cookie:d_c0="ACBC3UKxzwuPTshbFfLKRqjjMT0oWXgblAs=|1495696563"; _zap=437b6b27-f1ea-4408-80d7-5c0d01679561; q_c1=969379bf218041c8b0c146155605b388|1500691530000|1491210336000; q_c1=969379bf218041c8b0c146155605b388|1500691530000|1491210336000; aliyungf_tc=AQAAAMvycmzCfAMAXQ0sdy5m9cunAewC; capsion_ticket="2|1:0|10:1503046792|14:capsion_ticket|44:OTgxMmQ1ZWYxMzk1NDAwNzk2YTBmMTZkOTVmZmFmMDE=|886f268c2406b6999cf54f9de1306acec57e5c561a6e0a236e250960ba28f687"; _xsrf=e4f6faad-ed17-446a-8204-4cb57102e017; l_cap_id="NDJjZWY5MmQ5ZTFiNGNjZDhjZjUzMmQ1NmJiYjhlOWQ=|1503058417|f58fafeb509b28e04a3da1de781d4fb5ad9c935d"; r_cap_id="MmRiNmU0MGZhMWQ3NDYzOGE1NDU5NDU4ZDZlMzk0Y2Y=|1503058417|7a376b5ad222e073b75dd75d355114b864830b48"; cap_id="YzRmNzUzMzFjZjA0NGU4YTgwZjY0MDUzMzQwMzdkYTg=|1503058417|c113ad690d27d1443d711a988fee45ae86e450bb"; __utma=51854390.1774224497.1495696572.1503043822.1503058418.10; __utmb=51854390.0.10.1503058418; __utmc=51854390; __utmz=51854390.1503058418.10.8.utmcsr=zhihu.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmv=51854390.000--|2=registration_date=20151025=1^3=entry_date=20170403=1
Host:www.zhihu.com
Origin:https://www.zhihu.com
Referer:https://www.zhihu.com/
User-Agent:Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Mobile Safari/537.36
X-Requested-With:XMLHttpRequest
X-Xsrftoken:e4f6faad-ed17-446a-8204-4cb57102e017'''
headers = get_headers(header)
r = requests.get(url, headers = headers)
soup = BeautifulSoup(r.text,'lxml')
_xsrf = soup.find_all('input',attrs = {'name' : '_xsrf'})[0]['value']
dict_data['_xsrf'] = _xsrf
headers['X-Xsrftoken'] = _xsrf
url2 = 'https://www.zhihu.com/login/phone_num'
raw_input()
re = requests.post(url = url2, headers = headers, data = dict_data)
print re.status_code
print re.content
print re.json()['msg']
#the_page = r.text # we use the read method the read the file wo fetche from the URL.[@name="_xsrf"]/@vaule
#print the_page
错误信息如下
200
{
"r": 1,
"errcode": 100030,
"data": {"account":"\u767b\u5f55\u8fc7\u4e8e\u9891\u7e41\uff0c\u8bf7\u7a0d\u540e\u91cd\u8bd5"},
"msg": "\u767b\u5f55\u8fc7\u4e8e\u9891\u7e41\uff0c\u8bf7\u7a0d\u540e\u91cd\u8bd5"
}
登录过于频繁,请稍后重试
我在浏览器上模拟时我的账号可以登录,并且我一个程序只发了一个登录请求,为什么会是登录频繁呢,求dalao分析一波。header我反复修改,似乎都没有用处。不知道该怎么搞了。
你先把头写得跟浏览器一模一样再说吧。
知乎的登录,除了验证码,没有其它搞怪的机制的。