#-- conding:utf-8 --
import requests, re
def index_content(url_index, title=0, time=0):
for url in url_index:
index_req = requests.get(url)
index_req.encoding = 'utf-8'
index_content = index_req.text
content_url = re.findall('<div class="text text-no-img">.*?<p class="title">.*?<a href="(.*?)" target="_blank">',index_content,re.S)
title = re.search('<p class="title">.*?<a href=".*?" target="_blank">(.*?)</a>',index_content,re.S).group(1)
time = re.search('<p class="time">(.*?)</p>',index_content,re.S).group(1).replace("年", "-").replace("月", "-").replace("日", "")
print(content_url)
print(title)
print(time)
def content(content_url):
for url in content_url:
content_req = requests.get(url)
content_req.encoding = 'utf-8'
content = content_req.text
return content
url_template = 'http://finance.eastmoney.com/news/cgnjj_{}.html'
url_index = [url_template.format(pages) for pages in range(1, 2)]
index_content(url_index, title=0, time=0)
content(content_url)
报错 content_url
找不到。如果是两个方法的话,怎么调用呢?
或者说,我这两个方法写好了,怎么让它们一起工作...