爬虫报错的问题

新手上路,请多包涵

一开始正常运行,一段时间后就会出现下面这样的错误
<urlopen error [Errno 2] No such file or directory>
但是重新启动后又恢复正常了
这个问题是什么原因呢

导入模块

import time
import pandas as pd
import os,shutil

移动文件

def move_file(srcfile,dstfile):

if not os.path.isfile(srcfile):
    print ("%s not exist!"%(srcfile))
else:
    fpath,fname=os.path.split(dstfile)    #分离文件名和路径
    if not os.path.exists(fpath):
        os.makedirs(fpath)                #创建路径
    shutil.move(srcfile,dstfile)          #移动文件
    print ("move %s -> %s"%( srcfile,dstfile))

import smtplib
from email.header import Header
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.application import MIMEApplication
import os,datetime

def send_email(path):

# 时间戳
today_date = str(datetime.date.today())
yest_date = str(datetime.date.today() + datetime.timedelta(days=-1))
# 邮件发送人和接收人
user = '570793071@qq.com'           # 发送者邮箱
pwd = 'vpbkrmkysvsybcdd'            # 邮箱的授权码
receivers = ['570793071@qq.com']    # 接收者邮箱

# 创建一个带附件的邮件实例
message = MIMEMultipart()
message['From'] = user
message['To'] = ','.join(receivers)
subject = 'Anomalous' + today_date
message['Subject'] = Header(subject, 'utf-8').encode()
# 邮件正文内容
message.attach(MIMEText('提醒' + today_date + ', 请注意查收附件!\n有问题随时沟通,谢谢!', 'plain', 'utf-8'))


#路径文件夹下的所有文件
for file_name in os.listdir(path):  
    print(file_name)

    # 路径文件夹+文件名 ->合成文件路径
    file_path = os.path.join(path, file_name)
    print(file_path)

    # 构造附件,传送当前目录下的 csv 文件
    part = MIMEApplication(open(file_path, 'rb').read())
    part.add_header('Content-Disposition', 'attachment', filename=file_name)
    message.attach(part)

    try:
        smtpserver = 'smtp.qq.com'
        smtpport = 25
        s = smtplib.SMTP(smtpserver, timeout=10)  # 连接smtp邮件服务器,端口默认是25
        s.login(user, pwd)  # 登陆服务器
        s.sendmail(user, receivers, message.as_string())  # 发送邮件
        s.close()
    except Exception as e:
        print("send email error:" + str(e))

import urllib.request
import json

查询函数

def get_market():

# 存放各个币种的信息(symbol、price、turnover rate)
summa = []
stable_coin = ['usdt','usdc','tusd','pax','gusd','husd','busd','usdk','dusd','brl','eur','gbp']
# 查询市值前2000的币
for t in range(8):

    url = urllib.request.urlopen("https://api.coingecko.com/api/v3/coins/markets?vs_currency=usd&order=market_cap_desc&per_page=250&page=" + str(t + 1) + "&sparkline=false")
    data = json.loads(url.read().decode())

    for c in data:
        
        # 只获取符号和价格和换手率
        if c['current_price'] != None and c['total_volume'] != None and c['symbol'] not in stable_coin:
            turnover_rate = c['total_volume'] / c['market_cap']
            temp = {'symbol': c['symbol'] , 'price' : c['current_price'] , 'turnover_rate': turnover_rate}
            summa.append(temp)


summa_s.append(summa)

调用扫描后找出量比异常的币种

def scan_and_alert():

# 是否找到量比异常的币
flag = False    
seconds = time.time()

# 存放量比异常的币及其量比
anomalous = []  

get_market()

# secondse = time.time()
# print(str(int(secondse-seconds)))
# 队列先进先出,超过2个就释放前面的
if len(summa_s) > 2:   
    summa_s.pop(0)

# 间隔三分钟 前后数据对比
if len(summa_s) == 2:
    for m, n in zip(summa_s[0], summa_s[1]):
        if  m['turnover_rate'] > 0 :
            price_ratio = n['price'] / m['price']
            turnover_ratio = n['turnover_rate'] / m['turnover_rate'] 
            if turnover_ratio >= 1.5 or price_ratio >= 1.03:
                temp = {'symbol': m['symbol'], 'price_ratio': price_ratio ,'turnover_ratio': turnover_ratio}
                anomalous.append(temp)
                flag = True

# 找到量比异常的币种
if flag:
    df = pd.DataFrame(anomalous)
    path = 'D://crypto_currency_scan_result//' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(seconds))
    dird = 'D://crypto_currency_scan_result//'
    df.to_csv(path + '.csv', index=False)
    send_email(dird)
    srcfile=path+ '.csv'
    dstfile=path.replace('crypto_currency_scan_result','crypto_currency_scan_result_sended')+ '.csv'
    move_file(srcfile,dstfile)
# 三分钟循环一次
time.sleep(160)

存放两次查询结果

summa_s = []

while True:

scan_and_alert()
阅读 2.1k
1 个回答

原因很简单,因为请求的 url 不完整。

例如 https://segmentfault.com/q/1010000040954401 变成了 /q/1010000040954401 ,前面的 https://segmentfault.com 没了。这种情况就不会认为是一个 url ,而是一个地址。

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
1 篇内容引用