学习pyspider 参照《 把 pyspider的结果存入自定义的mysql数据库中 》文章
设置了本地的mysql数据库及连接,但运行时 提示 :process csair:_on_get_info data:,_on_get_info -> [200] len:12 -> result:None fol:0 msg:0 err:ImportError('No module named mysqldb',)
系统环境:centos7.1
mysql-python(1.2.5) pip 默认安装
python采用默认的2.7.5
希望得到帮助 感谢
文件已存到 pyspider/database/mysql/mysqldb.py 脚本文件如下:
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2015-12-21 09:28:49
# Project: csair
from pyspider.libs.base_handler import *
from pyspider.database.mysql.mysqldb import SQL
class Handler(BaseHandler):
crawl_config = {
}
@every(minutes=24 * 60)
def on_start(self):
self.crawl('http://www.csair.cn/cn/pages/NewsList.aspx?listname=%D6%D8%D2%AA%B9%AB%B8%E6', callback=self.index_page)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
for each in response.doc('a[href^="http"]').items():
self.crawl(each.attr.href, callback=self.detail_page)
def list_page(self, response):
for each in response.doc('ul_list7 a').items():
self.crawl(each.attr.href, callback=self.detail_page)
for each in response.doc('.currentnumeric').items():
self.crawl(each.attr.href, callback=self.list_page)
@config(priority=2)
def detail_page(self, response):
return {
"url": response.url,
"title": response.doc('ctl00_ContentPlaceHolder1_DetailBar1_NewsTitle ').text(),
"fabushijain" :response.doc('span#ctl00_ContentPlaceHolder1_DetailBar1_ReleaseLabel').text(),
"content" : response.doc('#ctl00_ContentPlaceHolder1_DetailBar1_ContentInfo').text(),
}
def on_result(self, result):
#print result
if not result or not result['title']:
return
sql = SQL()
sql.replace('info',**result)
++++++++++++++++++++++++++++++++++++++++++++++
mysqldb.py 文件内容如下:
class SQL:
username = 'pyspider' #数据库用户名
password = 'pyspider' #数据库密码
database = 'result' #数据库
host = 'localhost' #数据库主机地址
connection = ''
connect = True
placeholder = '%s'
def __init__(self):
if self.connect:
SQL.connect(self)
def escape(self,string):
return '`%s`' % string
def connect(self):
config = {
'user':SQL.username,
'password':SQL.password,
'host':SQL.host
}
if SQL.database != None:
config['database'] = SQL.database
try:
cnx = mysql.connector.connect(**config)
SQL.connection = cnx
return True
except mysql.connector.Error as err:
if (err.errno == errorcode.ER_ACCESS_DENIED_ERROR):
print "The credentials you provided are not correct."
elif (err.errno == errorcode.ER_BAD_DB_ERROR):
print "The database you provided does not exist."
else:
print "Something went wrong: " , err
return False
def replace(self,tablename=None,**values):
if SQL.connection == '':
print "Please connect first"
return False
tablename = self.escape(tablename )
if values:
_keys = ", ".join(self.escape(k) for k in values)
_values = ", ".join([self.placeholder, ] * len(values))
sql_query = "REPLACE INTO %s (%s) VALUES (%s)" % (tablename, _keys, _values)
else:
、 sql_query = "REPLACE INTO %s DEFAULT VALUES" % tablename
cur = SQL.connection.cursor()
try:
if values:
cur.execute(sql_query, list(itervalues(values)))
else:
cur.execute(sql_query)
SQL.connection.commit()
return True
except mysql.connector.Error as err:
print ("An error occured: {}".format(err))
return False
有两个思路供参考:
检查一下包下边的__init__.py是否存在
检查一下模块名和导入的模块名是否一致,特别是这个模块被多个py文件导入