Python urllib2 timeout

自编脚本,目的是从自己的服务器中连续下载两个压缩的CSV文件,解压缩并合并CSV,为数据分析前道工序。

问题出在连续下载部分,怪异的是Windows下工作正常,Linux下第二次下载总是失败。以下是部分代码。

import getopt, os, sys
from datetime import datetime, timedelta, date
import time
import zipfile
import shutil
import logging
import urllib, urllib2
import socket

local = "http://127.0.0.1:8888/static/archive"

def deflat_rename(date, snr):
    zip_fn = "%s.csv.zip"%(snr)
    zfile = zipfile.ZipFile(zip_fn, 'r')
    de_fn = "%s.csv"%(snr)
    fn_date = datetime.strftime(date, "%Y%m%d")
    fn = "%s_%s.csv"%(snr, fn_date)
    if de_fn in zfile.namelist():
        data = zfile.read(de_fn)
        with open(de_fn, 'w+b') as f:
            f.write(data)
        zfile.close()

    if os.path.exists(fn):
        print(fn)
        os.remove(fn)
    else:
        print("not exist")

    print(de_fn)
    os.rename(de_fn,fn)
    
    if os.path.exists(zip_fn):
        os.remove(zip_fn)   

    return fn
    
def download_file(res, snr):
    f = None
    fn = "%s.csv.zip"%(snr)
    try:
        f = urllib2.urlopen(res)
        data = f.read()        
        with open(fn, "wb") as code:
            code.write(data)
        print "download %s done"%(fn)
    except urllib2.URLError, e:
        print e
        return None
    except urllib2.HTTPError, e:
        print e
        return None
    finally:
        if None != f:
            f.close()
    return fn
    
def download(start, stop, snr):
    if 'iZ2573cw0yvZ' == socket.gethostname():
        host = local
    else:
        host = remote
       
    url_date = datetime.strftime(start, "%Y/%m/%d")
    url = "%s/%s/%s.csv.zip"%(host, url_date, snr)
    log("info","url: %s"%(url))

    fn_date = datetime.strftime(start, "%Y%m%d")
    fn = "%s_%s.csv"%(snr, fn_date)

    f = download_file(url, snr)
    if f is None:
        log("info","No such file on %s."%(url))

    f1 = deflat_rename(start, snr)

    url_date = datetime.strftime(stop, "%Y/%m/%d")
    url = "%s/%s/%s.csv.zip"%(remote, url_date, snr)

    fn_date = datetime.strftime(stop, "%Y%m%d")
    fn = "%s_%s.csv"%(snr, fn_date)

    f = download_file(url, snr)
    if f is None:
        log("info","No such file on %s."%(url))

    f2 = deflat_rename(stop, snr)
    return f1, f2 

Linux下错误:

./merge2report.py --date=20161201 --snr=A2H470049
download A2H470049.csv.zip done
A2H470049_20161130.csv
A2H470049.csv
<urlopen error [Errno 110] Connection timed out>
Traceback (most recent call last):
  File "./merge2report.py", line 245, in <module>
    main()
  File "./merge2report.py", line 236, in main
    f1, f2 = download(b, e, snr)
  File "./merge2report.py", line 119, in download
    f2 = deflat_rename(stop, snr)
  File "./merge2report.py", line 66, in deflat_rename
    zfile = zipfile.ZipFile(zip_fn, 'r')
  File "/usr/lib/python2.7/zipfile.py", line 701, in __init__
    self.fp = open(file, modeDict[mode])
IOError: [Errno 2] No such file or directory: 'A2H470049.csv.zip'

Windows下结果

> merge2report.py --date=20161201 --snr=A2H470049
download A2H470049.csv.zip done
A2H470049_20161130.csv
A2H470049.csv
download A2H470049.csv.zip done
A2H470049_20161201.csv
A2H470049.csv

关键在<urlopen error [Errno 110] Connection timed out>这句上。什么导致第二次下载超时?

阅读 2.9k
2 个回答

Python的官方文档说明,如果你没有指定一个timeout,那么它就会使用socket的timeout值.而socket实际上是由操作系统提供的,所以timeout值依赖于操作系统.

有可能是你的这个任务恰好处在windows的socket超时之内而在linux的socket超时之外.

可以打印出下载链接,看看里面是不是有空格或者不正常字符,我遇到过这种情况。因为urlopen不支持带空格路径。

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题