爬取微博信息,使用了cookie仍然无法登录微博

新手上路,请多包涵

按照网上的模板自己写了类似的代码爬取微博,可是response回来的html是登录界面的html,应该是没有成功登陆微博,但是和网上的代码是基本一样的

from bs4 import BeautifulSoup
import pandas as pd
import requests
url = "https://weibo.cn/2610622321/info"
cook = {"Cookies":"SUB=_2A25xtQZjDeRhGeVM6VQV8CvIyjuIHXVTWaorrDV6PUJbkdAKLW-ikW1NTRxe3ZqwS1KogOneF6bXUXz7lkhb7SGJ; SUHB=0QSpzHMgP-YPEV; SCF=AisDqT2yt7Pkh8gy_7C80NumATyJngEuCniBti09uOwx-8_0IHEkgoGo9HSTyrsBm2J-UMjtDw_DbAEbO-jH2xA.; _T_WM=2901aedc3b6c36f819fe83923328067d"}
html = requests.get(url,cookies = cook).content
soup = BeautifulSoup(html,'lxml')
print(soup)

可结果获取的html是这样的

<!DOCTYPE html>
<html style="height:100%">
<head>
<meta charset="utf-8"/>
<link href="http://h5.sinaimg.cn" rel="dns-prefetch"/>
<link href="http://u1.sinaimg.cn" rel="dns-prefetch"/>
<link href="http://ww1.sinaimg.cn" rel="dns-prefetch"/>
<link href="http://ww2.sinaimg.cn" rel="dns-prefetch"/>
<link href="http://ww3.sinaimg.cn" rel="dns-prefetch"/>
<link href="http://ww4.sinaimg.cn" rel="dns-prefetch"/>
<meta content="yes" name="apple-mobile-web-app-capable"/>
<meta content="black" name="apple-mobile-web-app-status-bar-style"/>
<link href="http://u1.sinaimg.cn/upload/h5/img/apple-touch-icon.png" rel="apple-touch-icon"/>
<title>登录 - 新浪微博</title>
<meta content="width=device-width,initial-scale=1.0,minimum-scale=1.0,maximum-scale=1.0,minimal-ui" id="viewport" name="viewport"/>
<link href="/css/weibo/signin/login.css?id=20141028160201" rel="stylesheet"/>
</head>
<body style="height:100%">
<div class="login-wrapper" id="loginWrapper" style="display:none">
<!-- 登陆成功跳转页 -->
<input id="loginSuccessAddress" name="url" type="hidden" value="https%3A%2F%2Fweibo.cn"/>
<!-- 登陆rf标识 -->
<input id="loginRF" name="rf" type="hidden"/>
<input id="loginRFCAL" name="rfcal" type="hidden"/>
<input id="oldUserName" type="hidden"/>
<input id="loginfrom" type="hidden" value=""/>
<input id="featurecode" type="hidden" value=""/>
<input id="hff" name="hff" type="hidden" value=""/>
<input id="hfp" name="hfp" type="hidden" value=""/>
<a class="close" href="javascript:history.go(-1);">关闭</a>
<section class="avatar-wrapper" id="avatarWrapper">
<!-- 头像 -->
<img onerror="this.src='/images/weibo/signin/default-avatar_2x.png';" src=""/>
</section>
<form>
<section class="box">
<div class="input-wrapper">
<i class="icon forName"></i>
<!-- 用户名 -->
<p class="input-box">
<input id="loginName" placeholder="邮箱/手机号" type="text"/>
<!-- 清除用户名小叉 -->
<a class="input-clear" href="javascript:;" id="loginnameclear" style="display:none"></a>
</p>
</div>
<div class="input-wrapper">
<i class="icon forPwd"></i>
<p class="input-box"><input id="loginPassword" placeholder="请输入密码" type="password"/></p>
</div>
</section>
<!-- 要隐藏添加 hid -->
<div id="verifyCodeWrapper" style="display:none">
<p class="vcode">
<img alt="" id="verifyCodeImage" src=""/>
<a href="javascript:;" id="changeVerifyCode">换一张</a>
</p>
<!-- 要隐藏添加 hid -->
<div class="box">
<input id="loginVCode" placeholder="请输入验证码" type="text"/>
</div>
</div>
<!-- 要隐藏添加 hid -->
<div id="dVerifyCodeWrapper" style="display:none">
<p class="shield-tit">动态码</p>
<!-- 要隐藏添加 hid -->
<div class="box">
<input id="loginDVCode" name="shieldCode" placeholder="请输入微盾动态码" type="text"/>
</div>
</div>
<div class="error-label" id="errorMsg" style="display:none">手机号不能为空</div>
<a class="btn btnRed" href="javascript:;" id="loginAction">登录</a>
</form>
<p class="label">
<a href="https://passport.weibo.cn/signin/other?r=https%3A%2F%2Fweibo.cn">第三方帐号</a>
</p>
<footer class="footer">
<a href="https://m.weibo.cn/reg/index?&amp;vt=4&amp;wm=3349&amp;wentry=&amp;backURL=https%3A%2F%2Fweibo.cn">注册帐号</a><a href="https://passport.weibo.cn/forgot/forgot?entry=wapsso&amp;from=0&amp;r=https%3A%2F%2Fm.weibo.cn%2F">忘记密码</a>
</footer>
</div>
<!-- 要隐藏添加 hid -->
<div class="popup" id="errorDialog" style="display:none">
<article class="wrapper" id="errorDialogPanel" style="top:180px;">
<header class="title"></header>
<span class="info" id="errorDialogMsg">帐号或密码错误,你也可以选择短信验证码方式登录微博。</span>
<div class="action">
<a class="btn cancel" href="javascript:;" id="errorDialogBtnF">取消</a>
<a class="btn confirm" href="javascript:;" id="errorDialogBtnT">验证码登录</a>
</div>
</article>
</div>
<div class="account-wrapper" id="accountWrapper" style="display:none">
<a class="close" href="javascript:history.go(-1);">关闭</a>
<section class="avatar-wrapper">
<!-- 头像 -->
<img alt="" id="ucavatar" onerror="this.src='/images/weibo/signin/default-avatar_2x.png';" src="/images/weibo/signin/default-avatar_2x.png"/>
</section>
<p class="label" id="uctext">使用<span id="ucname"></span>的身份登录网页版微博</p>
<p class="label" id="weibotext">检测到您已在微博客户端登录<br/>是否获取该登录状态?</p>
<a class="btn btnRed" href="javascript:;" id="ucORweiboLogin">确认</a>
<a class="btn btnWhite" href="javascript:;" id="changeLogin">使用其他帐号登录</a>
</div>
<form action="https://open.weibo.cn/oauth2/authorize" id="postform" method="post">
<button style="display:none" type="submit"></button>
</form>
<script type="text/javascript">
    var fEntry = '';
    var fWentry = '';
    var fBackUrl = '';

        fEntry = 'mweibo';
    
    
        fBackUrl = 'https%3A%2F%2Fweibo.cn';
        var fClientid = '';
        var fCode = '';
        var fQq = '';
    </script>
<!-- 登录wifi -->
<script charset="gb2312" src="/js/signin/req.js?v=20181220" type="text/javascript"></script>
<script charset="gb2312" src="/js/signin/weibologin.js?v=20181220" type="text/javascript"></script>
<script type="text/javascript">
    //记住用户名的时候需要把mode设置为1
    

    
    //这里是需要微盾
    </script>
</body>
</html>

请过来人解答。是需要模拟登陆吗?可是我看网上的教程都不需要模拟登陆啊

阅读 4.3k
2 个回答

过来人回答一波,这是前年我用过的爬虫,不知道还有没有用了,你可以试下,爬微博最好用u端web,网页简单好爬。

user_id = 你要爬取对象的id(此ID非彼ID)
cookie = {"Cookie": 你的cookie}
url = 'http://weibo.cn/u/%d'%user_id
urllist_set = set()
r=requests.get(url,cookies=cookie)
code=r.status_code
if code==200:
    print u"id和cookie加载正确"
else:
    print u"id和cookie加载失败!!!"

不用这么麻烦,登录微博的h5版本,
我原来写过这个,监控明星发的微博,一发就去评论,
一些依赖自己引入下,需要添加你的微博名跟密码:

#!usr/bin/python
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
import datetime
import sys
sys.setrecursionlimit(10000000) 
# global wbtext
# global wbtime
wbtext=''
wbname=''
driver = webdriver.PhantomJS(executable_path = '/Users/shangfan/Downloads/phantomjs-2.1.1-macosx/bin/phantomjs',service_args=['--ignore-ssl-errors=true', '--ssl-protocol=TLSv1'])
driver.set_window_size(411, 731)
urllogin = 'https://passport.weibo.cn/signin/login?entry=mweibo&res=wel&wm=3349&r=https%3A%2F%2Fm.weibo.cn%2F'
urlgz='https://m.weibo.cn/'
driver.get(urllogin)
time.sleep(5)
#开始登陆
elem = driver.find_element_by_id("loginName")
elem.send_keys(u'')//微博手机号
elem2 = driver.find_element_by_id("loginPassword")
elem2.send_keys(u'')//微博密码
print(u'开始登录微博')

login_tpye = driver.find_element_by_id('loginAction')
login_tpye.click()
print(u'登录成功')
time.sleep(10)
def printme():
    #刷新微博关注页面的间隔时间
    time.sleep(10)
    #更新关注页面
    driver.get(urlgz)
    time.sleep(3)
    while True:
        if driver.find_elements_by_class_name('weibo-text')==[]:
            print('页面没有加载完')
            time.sleep(1)
            continue
        else:    
            break
    global wbtext
    #拿到博主的名字
    global wbname
    wbname=driver.find_elements_by_class_name('m-text-cut')[1].text
    print wbname
    #拿到微博关注列表第一条微博的内容
    text_wb=driver.find_elements_by_class_name('weibo-text')[0]
    endstr=text_wb.text[0:5]
    print endstr
    #一进来就先评论下
    if wbtext!=endstr:
        wbtext=endstr
        oneHour()
    else:
        #评论过了
        print('已经评论过了,继续刷新页面')
    print (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    print('===============分割线==================')    
    printme()
    return;

def oneHour():
    print('有更新啦,现在你可以去评论啦')
    #点进去详情
    wapper_wb=driver.find_elements_by_class_name('weibo-text')[0]
    wapper_wb.click()
    time.sleep(1)
    # driver.save_screenshot("2.jpg")
    #点击评论按钮
    go_pl=driver.find_element_by_xpath('//*[@id="app"]/div[1]/div/div[4]/div/div[1]')
    go_pl.click()
    time.sleep(1)
    #输入评论
    global wbname
    pl_area=driver.find_elements_by_class_name('textarea')[0]
    pl_area.send_keys(wbname)
    pl_area.send_keys(u'我是沙发')
    #发送评论
    btn_senf=driver.find_elements_by_class_name('btn-send')[0]
    btn_senf.click()
    #判断是不是可以评论
    pb_dia=driver.find_elements_by_class_name('m-dialog')
    if pb_dia!=[]:
        print('作者设置不能评论')
        printme()
    print ('评论成功')
    time.sleep(2)
    #再次执行判断
    printme()
    return;
printme()
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题