单步调试的时候系统是正常的,但是等我保存完毕之后,直接在控制面板设置成running,点击运行就会报错,即使成功也没有返回结果,数据无法存到mongodb,代码如下,好像crawl_config也没有起到作用
from pyspider.libs.base_handler import *
from pymongo import MongoClient
import time
class Handler(BaseHandler):
def __init__(self):
self.base_url="http://et.airchina.com.cn/InternetBooking/AirLowFareSearchExternal.do?tripType=OW&searchType=FARE&flexibleSearch=false&directFlightsOnly=false&fareOptions=1.FAR.X&outboundOption.originLocationCode=%s&outboundOption.destinationLocationCode=%s&outboundOption.departureDay=%s&outboundOption.departureMonth=%s&outboundOption.departureYear=%s&outboundOption.departureTime=NA&guestTypes%%5B0%%5D.type=ADT&guestTypes%%5B0%%5D.amount=1&guestTypes%%5B1%%5D.type=CNN&guestTypes%%5B1%%5D.amount=0&guestTypes%%5B2%%5D.type=INF&guestTypes%%5B2%%5D.amount=0&pos=AIRCHINA_CN&lang=zh_CN"
self.airports = ['PEK','PVG','CKG']
self.client=MongoClient('mongodb://10.131.0.201:27017')
self.db=self.client.pyspider_resultdb
crawl_config = {
"headers":{
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding":"gzip, deflate",
"Accept-Language":"zh-CN,zh;q=0.8",
"Connection":"keep-alive",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
"Referer":"http://et.airchina.com.cn/www/jsp/aals/flightSearch.jsp"
}
}
@every(minutes=24 * 60)
def on_start(self):
[self.search(o,d) for o in self.airports for d in self.airports if o!=d]
def search(self,oc,dc):
print oc,'-',dc
self.crawl(self.base_url%(oc,dc,'31','08','2017'), callback=self.index_page,fetch_type='js',connect_timeout=120,timeout=3000,save={'departdate':'2017-08-31'},itag = time.ctime())
#time.sleep(5)
@config(age=24 * 60 * 60)
def index_page(self, response):
if not response.ok:
print "the request has been rejected."
time.sleep(600)
return []
tickets=[]
for tr in response.doc(".resultWithFF5>tbody>tr").items():
if tr.hasClass("combineRows") or tr.hasClass("combineRows"):
continue
ticket = {
"flightno":tr.find(".colFlight a").text(),
"departtime":tr.find(".colDepart div").text(),
"arrivetime":tr.find(".colArrive div").text(),
"airports":tr.find(".colAirports .simpleToolTip").text(),
"airplane":tr.find(".colType a").text(),
"departdate":response.save["departdate"],
"crawltime":time.time(),
"cabins":[
{"name":"头等","price":tr.find(".colCost_DF1 label").text()},
{"name":"头等折扣","price":tr.find(".colCost_DF2 label").text()},
{"name":"公务","price":tr.find(".colCost_DB1 label").text()},
{"name":"公务折扣","price":tr.find(".colCost_DB2 label").text()},
{"name":"超经全价","price":tr.find(".colCost_DP1 label").text()},
{"name":"超经折扣","price":tr.find(".colCost_DP2 label").text()},
{"name":"高端全价","price":tr.find(".colCost_DE1 label").text()},
{"name":"商旅知音","price":tr.find(".colCost_DE2 label").text()},
{"name":"折扣经济","price":tr.find(".colCost_DE3 label").text()},
{"name":"特价经济","price":tr.find(".colCost_DE4 label").text()},
{"name":"超值特价","price":tr.find(".colCost_DE5 label").text()}
]
}
tickets.append(ticket)
return tickets
def on_result(self,result):
if not result:
print "there is no result"
return
print result
self.db.tickets.insert_many(result)
pip上不是最新的,直接拉去github上的代码