import re
import random
import requests
rendom_num = random.random()
post_url = 'http://navi.cnki.net/knavi/Common/Search/NPaper'
pages = 1
post_data = {
'SearchStateJson':{"StateID":"","Platfrom":"","QueryTime":"","Account":"knavi","ClientToken":"","Language":"","CNode":{"PCode":"CCND","SMode":"","OperateT":""},"QNode":{"SelectT":"","Select_Fields":"","S_DBCodes":"","QGroup":[{"Key":"Navi","Logic":1,"Items":[],"ChildItems":[{"Key":"NPaper","Logic":1,"Items":[{"Key":1,"Title":"","Logic":1,"Name":"地区代码","Operate":"","Value":"0001?","ExtendType":0,"ExtendValue":"","Value2":""}],"ChildItems":[]}]}],"OrderBy":"RT|","GroupBy":"","Additon":""}},
'displaymode':1,
'pageindex':4,
'pagecount':21,
'index':pages,
'random':rendom_num
}
header = {
'Cookie':'UM_distinctid=160cf4141da790-0ba2bdda54b9fd-454f032b-ff000-160cf4141db7f0; Ecp_IpLoginFail=18010761.190.32.52; cnkiUserKey=0a9ae521-a984-c34f-9439-d2d12fa85751; ASP.NET_SessionId=133ge3vm32yrtztdrvlyycb1; SID_navi=120161; KNET_SSO_COOKIE_CHECK=2018-01-07 15:23:59; LID=admin$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4ggI8Fm4gTkoUKaID8j8gFw!!',
'Referer':'http://navi.cnki.net/KNavi/NPaper.html',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36',
'X-Requested-With':'XMLHttpRequest'
}
content = requests.post(url=post_url, data=post_data, headers=header).text
print(content)
以上,返回结果是 <?xml version="1.0" encoding="utf-16"?>
我看了请求头信息,网页编码是 text/html
但是返回结果为啥是 xml 的呢? 还就返回这一句话,其它啥都没有,会不会是随机值的原因?
你这个问题在post_data里面
这一块的数据应该写成字符串格式,否则requests对post_data数据进行解析的时候会出偏差
或者你可以用
data = urllib.parse.urlencode(post_data)
这样预处理一下再post