写入txt文件时出错

# -*- conding:utf-8 -*-

import requests,re

url_list_all = ['http://finance.eastmoney.com/news/cgnjj_{}.html'.format(pages) for pages in range(1,26)]


def content(url):
    req = requests.get(url,timeout = 500)
    print('content_ulr:',url)
    req.encoding = 'utf-8'
    content_source = req.text
    title = re.search('<h1>(.*?)</h1>',content_source,re.S).group(1)
    source_time = re.search('<div class="time">(.*?)</div>', content_source, re.S).group(1)
    source = re.search('<span>来源:</span>(.*?)</div>', content_source, re.S).group(1)
    #description = re.search('<div class="abstract">摘要</div>(.*?)</div>', content_source, re.S).group(1)
    content_body = re.search('<!--文章主体-->(.*?)<!--责任编辑-->', content_source, re.S).group(1)
    content_content = {'title:': title, 'time:': source_time, 'source:': source, 'content:': content_body}
    print(content_content)
    # print('标题:',title)
    # print('时间:',source_time)
    # print('来源:',source)
    # #print('摘要:',description)
    # print('内容:',content_body)

def file_save(content):
    with open('C:\\Users\\Administrator\\Desktop\\east.txt','a') as f:
        f.write(content + '\n')
        f.close()
for url in url_list_all:
    print('url_list:',url)
    req = requests.get(url)
    req.encoding = 'utf-8'
    list_content = req.text
    content_url = re.findall('<p class="title">.*?<a href="(.*?)" target="_blank">',list_content,re.S)
    #print(content_url)
    #pic = re.search('<div class="image">.*?<a.*?"><img src="(.*?)" />.*?</a>',list_content,re.S).group(1)
    for url in content_url:
        content(url)
        file_save(content_content)

以上,代码比较混乱...如果只是正常输出的话,没问题,但是调用函数保存的时候就不行。试了几种方法,报错类型有大概有三种:1、字典不能和字符串相加 2、参数必须是字符串 3、找不到content_content 请求帮忙看下

阅读 2.8k
1 个回答

你的代码有几个地方有问题,按照下面的代码再试试:

#-- conding:utf-8 --
import requests
import re
import json


url_list_all = ['http://finance.eastmoney.com/news/cgnjj_{}.html'.format(pages) for pages in range(1,3)] #减少了页码,测试的快点


def content(url):
        req = requests.get(url,timeout = 1000)
        print('content_ulr:',url)
        req.encoding = 'utf-8'
        content_source = req.text
        title = re.search('<h1>(.*?)</h1>',content_source,re.S).group(1)
        source_time = re.search('<div class="time">(.*?)</div>', content_source, re.S).group(1)
        source = re.search('<span>来源:</span>(.*?)</div>', content_source, re.S).group(1)
        content_body = re.search('<!--文章主体-->(.*?)<!--责任编辑-->', content_source, re.S).group(1)
        content_content = {'title:': title, 'time:': source_time, 'source:': source, 'content:': content_body}
        return content_content


def file_save(content):
        with open('./east.txt','a') as f:
                f.write(str(content))

for url in url_list_all:
        req = requests.get(url)
        req.encoding = 'utf-8'
        list_content = req.text
        content_url = re.findall('<p class="title">.*?<a href="(.*?)" target="_blank">',list_content,re.S)

        for u in content_url:
                cnt = content(u)
                file_save(cnt)
        
    
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题