python爬虫爬取知乎问题

发布于
2015-07-15

更新于
2016-08-02

import requests
from bs4 import BeautifulSoup

url = 'http://www.zhihu.com/#signin'
url1 = 'http://www.zhihu.com/login/email'
url2 = 'http://www.zhihu.com/'

ans = requests.get(url)
soup = BeautifulSoup(ans.content)
_xsrf = soup.find('input',type= 'hidden')['value']
print _xsrf

postdata = {'_xsrf':_xsrf,

        'password':'000000000',
        'email':'000000000'}

headers = {

    'Host':'www.zhihu.com',
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0',
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
    'Accept-Encoding':'gzip, deflate',
    'Referer':'http://www.zhihu.com/',
    'If-None-Match':"FpeHbcRb4rpt_GuDL6-34nrLgGKd.gz",
    'Cache-Control':'max-age=0',
    'Connection':'keep-alive'

    }

ans1 = requests.post(url1,data = postdata,headers = headers,cookies = ans.cookies)

ans2 = requests.get(url2,cookies = ans1.cookies)
print ans2.text