# -*- conding:utf-8 -*-
import requests,re
url_list_all = ['http://finance.eastmoney.com/news/cgnjj_{}.html'.format(pages) for pages in range(1,26)]
def content(url):
req = requests.get(url,timeout = 500)
print('content_ulr:',url)
req.encoding = 'utf-8'
content_source = req.text
title = re.search('<h1>(.*?)</h1>',content_source,re.S).group(1)
source_time = re.search('<div class="time">(.*?)</div>', content_source, re.S).group(1)
source = re.search('<span>来源:</span>(.*?)</div>', content_source, re.S).group(1)
#description = re.search('<div class="abstract">摘要</div>(.*?)</div>', content_source, re.S).group(1)
content_body = re.search('<!--文章主体-->(.*?)<!--责任编辑-->', content_source, re.S).group(1)
content_content = {'title:': title, 'time:': source_time, 'source:': source, 'content:': content_body}
print(content_content)
# print('标题:',title)
# print('时间:',source_time)
# print('来源:',source)
# #print('摘要:',description)
# print('内容:',content_body)
def file_save(content):
with open('C:\\Users\\Administrator\\Desktop\\east.txt','a') as f:
f.write(content + '\n')
f.close()
for url in url_list_all:
print('url_list:',url)
req = requests.get(url)
req.encoding = 'utf-8'
list_content = req.text
content_url = re.findall('<p class="title">.*?<a href="(.*?)" target="_blank">',list_content,re.S)
#print(content_url)
#pic = re.search('<div class="image">.*?<a.*?"><img src="(.*?)" />.*?</a>',list_content,re.S).group(1)
for url in content_url:
content(url)
file_save(content_content)
以上,代码比较混乱...如果只是正常输出的话,没问题,但是调用函数保存的时候就不行。试了几种方法,报错类型有大概有三种:1、字典不能和字符串相加 2、参数必须是字符串 3、找不到content_content 请求帮忙看下
你的代码有几个地方有问题,按照下面的代码再试试: