for link in link_list:
yield Request(url=link,callback=self.get_title(),dont_filter=True,meta={'title_list':title_list})
# time
time_list = re.findall('},"published_at":"(.*?)",', response.text, re.S)
for i in range(0, len(time_list)):
print(time_list[i] + "\n")
self._requests_(title_list,link_list,time_list,'36kr')
def get_title(self,response):
title_list = response.meta['title_list']
if "video" in response.url:
title = (re.findall('\"small_image","template_title":"(.*?)",', response.text, re.S)[0]).replace("_36氪", "")
title_list.append(title)
if 'html' in response.url:
title = (etree.HTML(response.text).xpath('//head/title/text()')[0]).replace("_36氪", "")
title_list.append(title)
def _requests_(self, title_list, link_list, time_list, *args):
for num in range(0, len(time_list)):
data = self.http.set_post().http_send("/spider/news/save-source",
{"title": title_list[num], "publishTime": time_list[num],
"link": link_list[num], "source": args[0]})
time.sleep(2)
print(data)
我在for link 的时候 回调了一个函数get_title ,但是现在问题如何调用一个__request__这个特殊的接口传递三个列表 time_list ,title_list,link_list