本文从https://www.rankedftw.com/lad... 爬取了星际2天梯数据,并存储到本地的MySQL数据库中,共爬得32万条数据。
import requests
from bs4 import BeautifulSoup
import pymysql.cursors
from config import *
from multiprocessing import Pool
# 连接到数据库
connection = pymysql.connect(host, user, password, db, port)
cursor = connection.cursor()
def get_html(url):
# 获取html
r = requests.get(url)
return r.text
def save_to_db(values):
# 插入数据到表格中
insert_ = 'INSERT INTO {} VALUES{}'.format(tabel, tuple(values))
try:
if cursor.execute(insert_):
connection.commit()
except:
connection.rollback()
print('插入失败', values)
def get_info(html):
# 解析html获取数据并存储
soup = BeautifulSoup(html,'lxml')
infos = soup.find('table',class_='team-size-1').find_all('tr')
for i in infos[1:]:
if i.find_all('td', class_='img')[1].find('img', class_='league') is not None:
League = i.find_all('td', class_='img')[1].find('img', class_='league').get('src').split('/')\[-1][:-10]
else:
League = '0'
infomation = {'Rank': int(i.find_all('td', class_='number')[0].text),
'Name': i.find('span', class_='name').text,
'MMR': int(i.find_all('td', class_='number')[1\].text),
'Points': int(i.find_all('td', class_='number')[2].text),
'Wins': int(i.find_all('td', class_='number')[3].text),
'Losses': int(i.find_all('td', class_='number')[4].text),
'Played': 0,
'WinRate': i.find_all('td', class_='number')[6].text,
'Age': i.find_all('td', class_='number')[7].text,
'Region': i.find_all('td', class_='img')[0].find('img').get('src')[-12:-10],
'League': League,
'Tier': int(i.find_all('td', class_='img')[2].text),
'Race': i.find('img', class_='race').get('src').split('/')[-1]\[:-10],
}
#keys = [i for i in infomation.keys()]
values = [i for i in infomation.values()]
save_to_db(values)
#list_ = [int(Rank),Name,int(MMR),int(Points),int(Wins),int(Losses),Played,WinRate,Age,Region,League,int(Tier),Race]
#save_to_db(list_)
def main(offset):
try:
url = 'https://www.rankedftw.com/ladder/lotv/1v1/mmr/?offset={}'.format(offset*100)
html = get_html(url)
get_info(html)
except Exception as e:
print(e)
if __name__ == '__main__':
pool_ = Pool()
pool_.map(main, [i for i in range(3254)])
connection.close()
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。