爬取网站地址:http://www.xicidaili.com/nn
通过Python获取IP地址,端口,位置,是否匿名,类型,速度,连接时间,验证时间。
并将这些数据插入到数据库
以下是我下的代码,求各位指点下,谢谢
import requests
from bs4 import BeautifulSoup
import MySQLdb
import MySQLdb.cursors
import sys
reload(sys)
sys.setdefaultencoding('utf8')
file_name = 'proxy.txt'
url = 'http://www.xici.net.co/nn/1'
source_code = requests.get(url)
soup = BeautifulSoup(source_code.content, "lxml")
list_soup = soup.find_all('tr', class_='odd')
ip_list = []
for proxy_info in list_soup:
ip = ' '.join(proxy_info.get_text().split()).encode('utf-8')
#print ip.encode('utf-8')
for times in proxy_info.find_all('div', class_='bar'):
getTime = ' ' + times.get('title').encode('utf-8')
ip += getTime
ip_list.append(ip)
f = open(file_name, 'w')
for i in ip_list:
f.write(i + '\n')
f.close()
conn = MySQLdb.connect(host='localhost', user='root',
passwd='123456', db='python', port = 3306)
cur = conn.cursor()
for j in range(len(ip_list)):
cur.execute('insert into ipList(ipList) values(%s)', ip_list[j])
print 'success connect'
cur.close()
conn.close()
代码没有仔细看,但是很明显的问题是
insert
语句的execute
之后没有commit transcation
解决方法:
在
cur.execute('insert into ipList(ipList) values(%s)', ip_list[j])
的下一行添加