我在实现一个简单的需求,验证邮箱是否已经注册,向目标服务器发送一个请求后,如果返回204是未注册,返回409是已注册,待检测的邮箱存放在txt文件中,读取后做为列表,遍历执行,我使用request实现,并使用了代理,但是一个一个发送请求太慢,请问如何多线程并发执行?
- 如何使用多线程
- 对于待检测的邮箱,如何避免多线程同时检测一个邮箱,或者重复检测,希望能做到,每个线程都能检测到未检测的邮箱,已检测的邮箱不会被读取,应该是使用队列实现?
下面的是代码,部分信息打码。
import requests
import time
import json
class Validator(object):
def __init__(self):
self.headers = {
}
self.params = (
)
def run(self,name,proxies):
data = '{"emailAddress":"%s"}' %name
url = ''
response = requests.post(url=url,headers=self.headers,params=self.params,data=data,timeout=9,proxies=proxies)
#print(response.status_code)
if response.status_code == 204:
result = '{} 未注册 HTTP返回:{}'.format(name, response.status_code)
elif response.status_code == 409:
result = '{} 已注册 HTTP返回:{}'.format(name, response.status_code)
else:
result = '{} 检测异常 HTTP返回:{}'.format(name, response.status_code)
#print(result)
return result
# 以列表的形式读取待查询的帐号
def get_list():
with open ('unvalidated.txt', 'r') as f:
email_list = [i.strip() for i in f.readlines()]
return email_list
# 返回代理
def get_proxy(retry=3):
start = 0
while start <= retry:
res = requests.get("")
dic_info = res.text
dic_info = json.loads(dic_info)
status_code = dic_info['code']
if status_code != '0':
start += 1
time.sleep(0.5)
continue
data = dic_info['obj']
ip = data[0]["ip"]
port = data[0]["port"]
ip_port = 'http://{}:{}'.format(ip,port)
return {'http': ip_port}
return None
if __name__=='__main__':
app = Validator()
proxies = get_proxy()
# 循环查询
for email in get_list():
# 使用try,避免错误导致被挂起
try:
#proxies = get_proxy()
# 定义验证时间
validating_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
# 定义打印格式
info = '检测时间:{} {}'.format(validating_time, app.run(email,proxies))
print(info)
# 写入日志文件
with open('validate_log.txt', 'a+') as f:
f.write(info + '\n')
# 设置查询延迟
time.sleep(0.5)
except requests.exceptions.RequestException as e:
info = '检测时间:{} {} 检测错误 错误信息:{}'.format(validating_time, email, e)
print(info)
# 写入日志文件
with open('validate_log.txt', 'a+') as f:
f.write(info + '\n')
time.sleep(0.5)
自己再改改吧