登陆知乎验证码问题?

代码如下

# -*- coding: utf-8 -*-

import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )

import urllib
import urllib2
import cookielib
import re
import webbrowser

class ZhiHu:

    def __init__(self,):
        self.username = ''
        self.password = ''
        self.filename = 'cookie.txt'
        
        self.lgurl = 'http://www.zhihu.com/login/email'
        self.cookie = cookielib.MozillaCookieJar(self.filename)
        self.cookie_handler = urllib2.HTTPCookieProcessor(self.cookie)
        self.opener = urllib2.build_opener(self.cookie_handler)
        
    #第一次登陆得到cookie
    def firstlogin(self):
        zhihu = 'http://www.zhihu.com'
        request = urllib2.Request(zhihu)
        res = urllib2.urlopen(request)
        contents = res.read().decode('utf-8')
        #self.cookie.save(ignore_discard=True,ignore_expires=True)
        return contents
    #第二次登陆,返回登陆的getcode()
    def seclogin(self,contents):
        loginURL = self.lgurl
        pattern = re.compile(r'<input type="hidden" name="_xsrf" value="(.*?)"/>',re.S)
        result = re.findall(pattern,contents)
        xsrf = result[0]
        hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0'}
        post_data = {'_xsrf':xsrf,'email':self.username,'password':self.password,'rememberme':'y'}
        dt = urllib.urlencode(post_data)
        req = urllib2.Request(loginURL,dt,hdr)
        response = self.opener.open(req)
        page = response.read().decode('gbk')
        return response.getcode()
    #判断是否需要验证码
    def needIdenCode(self):
        home ='http://www.zhihu.com/#signin'
        sign = urllib2.urlopen(urllib2.Request(home))
        details= sign.read().decode('utf-8')
        contents = self.firstlogin()        
        status = self.seclogin(contents)
        if status==200:
            print "make it"
            #正则验证码,可能是这里出现问题
            pattern = re.compile(u'\u9a8c\u8bc1\u7081',re.S)
            result = re.search(pattern,sign)
            if result:
                print u'需要验证码'
                return sign
            else:
                print 'ok'
    #得到验证码图片。            
    def getpic(self,sgin):
        pattern = re.compile(r'<img class="js-captcha-img" width.*?src="(.*?)"></imhg>',re.S)
        items = re.search(patttern,sign)
        if items and items.goup(1):
            print items.group(1)
            return items.group(1)
        else:
            print 'false'
            return False


    def main(self):
        needResult = self.needIdenCode()
        if needResult == True:
            print 'input'
            idencode = self.getpic(needResult)

            if idencode ==True:
                print u"在浏览器中获取验证码"
                webbrowser.open_new_tab(idencode)
            else:
                print "shibai"

        else:
            print u"直接登陆"



start = ZhiHu()
start.main()

代码比较繁琐,刚开始写。出现的错误是

make it
Traceback (most recent call last):
  File "D:\pythonIDE\py\jiu.py", line 91, in <module>
    start.main()
  File "D:\pythonIDE\py\jiu.py", line 74, in main
    needResult = self.needIdenCode()
  File "D:\pythonIDE\py\jiu.py", line 56, in needIdenCode
    result = re.search(pattern,sign)
  File "C:\Python27\lib\re.py", line 146, in search
    return _compile(pattern, flags).search(string)
TypeError: expected string or buffer
[Finished in 1.2s with exit code 1]

请大家帮忙看一下,就快登陆成功了,谢谢。

阅读 3.3k
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进