本人正在写一个selenium爬虫的网页面板,用flask控制selenium的一些行为。目标网页需要扫描二维码登陆,现在卡在扫描二维码完成登陆后这个阶段,因为想不到如何怎么实现登录后进行状态轮询,并返回到Flask进而触发网页元素变化。具体代码如下:
from flask import Flask,render_template,request,redirect,url_for
from Control import control
from flask import session,escape
from flask_socketio import SocketIO
app = Flask(__name__)
app.config['SECRET_KEY'] = 'dqwer235r*tbqew4r1$1232~@'
test = control()
socketio = SocketIO(app)
@app.route('/',methods=['GET','POST'])
def login():
try:
if request.method == 'POST':
uname = request.form['username']
passwd = request.form['password']
if uname == 'admin' and passwd == '123456':
session['username'] = 'admin'
return redirect(url_for('dashboard'))
except Exception as e:
return render_template('homepage.html',error=e)
return render_template('homepage.html')
@app.route('/dashboard/',methods=['GET','POST'])
def dashboard():
if request.method == 'GET'and request.headers.get("Referer") == 'http://127.0.0.1/':
if 'username' in session:
if session['username'] == 'admin':
if 'QRstatus' in session and session['QRstatus'] == 'True':
if test.ck_login():
return render_template('dashboard.html',msg='Logined.',qrSrc=test.ck_login())
else:
session['QRstatus'] = 'False'
else:
# TODO: Add not login status
return render_template('dashboard.html')
else:
return redirect(url_for('login'))
elif request.method == 'POST':
if request.form['submit'] == 'Start':
if 'QRstatus' in session and session['QRstatus'] == 'True':
return render_template('dashboard.html',msg='QR code was printed.',qrSrc=test.ck_login())
msg = 'Started.'
qrSrc = test.qr()
session['QRstatus'] = 'True'
return render_template('dashboard.html',msg=msg,qrSrc=str(qrSrc))
else:
msg = 'no'
return render_template('dashboard.html',msg=msg)
return render_template('dashboard.html')
if __name__ == '__main__':
socketio.run(app,port=80)
selenium的部分:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException,StaleElementReferenceException
class img_url_match(object):
def __init__(self,locator,text_):
self.locator = locator
self.text = text_
def __call__(self, driver):
try:
img_src = EC._find_element(driver,self.locator).get_attribute('src')
return img_src.startswith(self.text)
except StaleElementReferenceException:
return False
class control:
def __init__(self):
self.driver = webdriver.Chrome()
def qr(self):
self.driver.get('http://example.com')
try:
while True :
if not WebDriverWait(self.driver,2).until(img_url_match((By.ID,'js_login_qrcode_img'),'data:image/gif;base64')):
continue
else:
qr_src = self.driver.find_element_by_id('js_login_qrcode_img').get_attribute('src')
return qr_src
except TimeoutException:
self.driver.refresh()
def ck_login(self):
if self.driver.current_url == 'http://example.com':
try:
url = WebDriverWait(self.driver,25).until(EC.url_changes('http://example.com'))
return 'data:image/gif;base64,' +self.driver.get_screenshot_as_base64()
except TimeoutException:
return False
elif self.driver.current_url == 'http://example.com/home/':
return 'data:image/gif;base64,' + self.driver.get_screenshot_as_base64()
elif self.driver.current_url == ':data:,':
print('Now the url is :' + self.driver.current_url)
WebDriverWait(self.driver,5).until(EC.presence_of_element_located((By.ID,'js-ch-member-face')))
return 'data:image/gif;base64,' + self.driver.get_screenshot_as_base64()
if __name__ == '__main__':
test = control()
print(test.qr())
现在我打算用协程或者subprocess来进行selenium某些状态的轮询,但不知这样会不会启动另一个新的selenium实例,即开启一个新的浏览器?另外使用tornado会不会好些?
谢谢大家。
现在的解决方案是网页端setInterval结合socketio进行状态判断