v0.3.9 单步调试可以运行,也能存到mongodb,但是使用running无法获取到结果

单步调试的时候系统是正常的,但是等我保存完毕之后,直接在控制面板设置成running,点击运行就会报错,即使成功也没有返回结果,数据无法存到mongodb,代码如下,好像crawl_config也没有起到作用

from pyspider.libs.base_handler import *
from pymongo import MongoClient
import time

class Handler(BaseHandler):
        def __init__(self):
            self.base_url="http://et.airchina.com.cn/InternetBooking/AirLowFareSearchExternal.do?tripType=OW&searchType=FARE&flexibleSearch=false&directFlightsOnly=false&fareOptions=1.FAR.X&outboundOption.originLocationCode=%s&outboundOption.destinationLocationCode=%s&outboundOption.departureDay=%s&outboundOption.departureMonth=%s&outboundOption.departureYear=%s&outboundOption.departureTime=NA&guestTypes%%5B0%%5D.type=ADT&guestTypes%%5B0%%5D.amount=1&guestTypes%%5B1%%5D.type=CNN&guestTypes%%5B1%%5D.amount=0&guestTypes%%5B2%%5D.type=INF&guestTypes%%5B2%%5D.amount=0&pos=AIRCHINA_CN&lang=zh_CN"
            self.airports = ['PEK','PVG','CKG']
            self.client=MongoClient('mongodb://10.131.0.201:27017')
            self.db=self.client.pyspider_resultdb
            
        crawl_config = {
            "headers":{
                "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
                "Accept-Encoding":"gzip, deflate",
                "Accept-Language":"zh-CN,zh;q=0.8",
                "Connection":"keep-alive",
                "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
                "Referer":"http://et.airchina.com.cn/www/jsp/aals/flightSearch.jsp"
            }
        }
    
        @every(minutes=24 * 60)
        def on_start(self):
            [self.search(o,d) for o in self.airports for d in self.airports if o!=d]
    
        def search(self,oc,dc):
            print oc,'-',dc
            self.crawl(self.base_url%(oc,dc,'31','08','2017'), callback=self.index_page,fetch_type='js',connect_timeout=120,timeout=3000,save={'departdate':'2017-08-31'},itag = time.ctime())
            #time.sleep(5)
            
        @config(age=24 * 60 * 60)
        def index_page(self, response):
            if not response.ok:
                print "the request has been rejected."
                time.sleep(600)
                return []
            
            tickets=[]
            for tr in response.doc(".resultWithFF5>tbody>tr").items():
                if tr.hasClass("combineRows") or tr.hasClass("combineRows"):
                    continue
                ticket = {
                    "flightno":tr.find(".colFlight a").text(),
                    "departtime":tr.find(".colDepart div").text(),
                    "arrivetime":tr.find(".colArrive div").text(),
                    "airports":tr.find(".colAirports .simpleToolTip").text(),
                    "airplane":tr.find(".colType a").text(),
                    "departdate":response.save["departdate"],
                    "crawltime":time.time(),
                    "cabins":[
                        {"name":"头等","price":tr.find(".colCost_DF1 label").text()},
                        {"name":"头等折扣","price":tr.find(".colCost_DF2 label").text()},
                        {"name":"公务","price":tr.find(".colCost_DB1 label").text()},
                        {"name":"公务折扣","price":tr.find(".colCost_DB2 label").text()},
                        {"name":"超经全价","price":tr.find(".colCost_DP1 label").text()},
                        {"name":"超经折扣","price":tr.find(".colCost_DP2 label").text()},
                        {"name":"高端全价","price":tr.find(".colCost_DE1 label").text()},
                        {"name":"商旅知音","price":tr.find(".colCost_DE2 label").text()},
                        {"name":"折扣经济","price":tr.find(".colCost_DE3 label").text()},
                        {"name":"特价经济","price":tr.find(".colCost_DE4 label").text()},
                        {"name":"超值特价","price":tr.find(".colCost_DE5 label").text()}
                    ]
                }
                tickets.append(ticket)
            return tickets
        
        def on_result(self,result):
            if not result:
                print "there is no result"
                return
            
            print result
            
            self.db.tickets.insert_many(result)
           
阅读 2.1k
1 个回答

pip上不是最新的,直接拉去github上的代码

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进