#!/usr/bin env python3
#coding=utf-8
import scrapy,os
import re,time,json
from lxml import etree
# import yt_common
import hashlib
class WechatSpider(scrapy.Spider):
name = 'wechat_official_account'
allowed_domains = ['wexin.qq.com']
start_urls = ['https://mp.weixin.qq.com/']
def __init__(self):
self.official_account = os.getcwd() +"/"+"official_account.text"
def start_requests(self):
url='https://mp.weixin.qq.com/cgi-bin/bizlogin?action=startlogin'
headers={"Referer": "https://mp.weixin.qq.com/cgi-bin/loginpage?t=wxm2-login&lang=zh_CN",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36"
}
form_data={"username":"leehome1989@sina.com","pwd": hashlib.new(name='md5', string='sw892130').hexdigest(),"f":"json","ajax":"1","lang": "zh_CN"}
yield scrapy.FormRequest(url=url,formdata=form_data,headers=headers,callback=self.wechat_login)
def wechat_login(self,response):
print(response.text)
redirect_url=re.findall('redirect_url\":\"(.*?)\"',response.text)[0]
url = "https://mp.weixin.qq.com" + redirect_url
headers = {"Referer": url,
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
'Upgrade - Insecure - Requests': 1}
qr_url='https://mp.weixin.qq.com/'
yield scrapy.Request(url=qr_url,callback=self.get_cookie, dont_filter=True,headers=headers)
def get_cookie(self,response):
print(response.text)
print("asdasdasdasdada")
cookies = response.request.headers.getlist('Cookie')
print(cookies)
token = re.findall(r'token=(\d+)', response.text)[0]
print(token)
# headers={"Host": "mp.weixin.qq.com",
# "Referer": "https://mp.weixin.qq.com/cgi-bin/bizlogin?action=validate&lang=zh_CN&account=leehome1989%40sina.com",
# "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.3"}
# yield scrapy.Request(url=url, callback=self.get_fakeid, dont_filter=True, headers=headers,cookies=cookies)
def get_fakeid(self,response):
token=response.meta['meta']
print (response.text)
fakeid=re.findall('fakeid\":"(.*)==",',response.text)[0]
print (fakeid)
url='https://mp.weixin.qq.com/cgi-bin/appmsg?token={}&lang=zh_CN&f=json&ajax=1&action=list_ex&begin=0&count=5&query=&fakeid={}%3D%3D&type=9'.format(token,fakeid)
yield scrapy.Request(url=url,callback=self.parse,dont_filter=True,headers=self.headers)
def pasrse(self,response):
print (response.text)
微信公众号的接口登陆 ,因为这里面的token 是基于扫码后的,但是我这么请求 总是无法获得那个页面