异步下载国旗图片和国家名数据

import aiohttp
import asyncio
import os
import time
import sys

POP20_CC = ('CN IN US ID BR PK NG BD RU JP MX PH VN ET EG DE IR CD FR').split()
BASE_URL = 'http://flupy.org/data/flags'
DEST_DIR = 'downloads/'

class FetchError(Exception):  #用来捕获异常
    def __init__(self, country_code):
        self.country_code = country_code

def save_flag(img, filename):  #保存图片文件
    path = os.path.join(DEST_DIR, filename)
    with open(path, 'wb') as fp:
        fp.write(img)

async def http_get(session, url):  #负责下载的主函数,session由download_many传递
    async with session.get(url) as resp:
        if resp.status == 200:
            ctype = resp.headers.get('Content-type', '').lower() 
            if 'json' in ctype or url.endswith('.json'): #国家名是json数据。如果内容类型是json
                data = await resp.json()  #那么就用json()方法获取内容
            else:
                data = await resp.read()  #否则直接获取元数据
            return data
        elif resp.status == 404: #捕获异常
            raise web.HTTPNotFound()
        else:
            raise aiohttp.errors.HttpProcessingError(code=res.sstatus, message=res.reason, headers=res.headers)

async def get_flag(session, cc):  #获取图片
    url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
    image = await http_get(session, url)  #这里是I\o请求需要异步操作
    return image

async def get_country(session, cc):  #获取国家名
    url = '{}/{cc}/metadata.json'.format(BASE_URL, cc=cc.lower())
    metadata = await http_get(session, url) #这里是I\o请求需要异步操作
    return metadata['country']

def show(text):
    print(text + '[OK]  ', end='')
    sys.stdout.flush()  #挨个输出国家名,没有这个刷新缓冲区方法就会最后一下子输出所有国家名。

async def download_one(session, cc):
    try:
        image = await get_flag(session, cc)  #异步获取图片
        country = await get_country(session, cc)  #异步获取国家名
    except web.HTTPNotFound:
        msg = 'not found'
    except Exception as exc:
        raise FetchError(cc) from exc
    else:  #try中的else子句在没有except异常时会运行
        country = country.replace(' ', '_')
        filename = '{}-{}.gif'.format(cc, country)
        loop = asyncio.get_event_loop()  #这里获取循环是为了在保存图片时不阻塞
        loop.run_in_executor(None, save_flag, image, filename) #run_in_excutor函数内部维护了一个TheardPollExecutor对象[注1]。第一个参数默认为concurrent.futures.Executor实例。
    show(cc)
    return cc

async def download_many(cc_list):
    async with aiohttp.ClientSession() as session:  #获取ClientSession对象
        res = await asyncio.gather(*[asyncio.ensure_future(download_one(session, cc)) for cc in sorted(cc_list)]) #gather函数如果参数为协程对象它会自动排定为一个task,这里我们直接用的ensure_future方法生成了task对象。然后并发的运行他们,返回一个结果聚合值列表。
    return len(res)

def main():
    t0 = time.time()
    loop = asyncio.get_event_loop()
    count = loop.run_until_complete(download_many(POP20_CC))
    loop.close()
    elapsed = time.time() - t0
    msg = '\n{} flags download in {:.2f}s'
    print(msg.format(count, elapsed))

if __name__ == '__main__':
    main()

#def a(*x):
    #print(x)
#a([1,2]) --> ([1,2],)
#a(*[1,2]) --> (1,2)
#*号可以将列表或元组里的元素解包,每个元素作为单独的参数传入


其实,异步库依赖于低层线程(直至内核级线程),但是这些库的用户无需创建线程,也无需知道用到了基础设施中的低层线程。在应用中,我们只需确保没有阻塞的代码,事件循环会在背后处理并发。异步系统能避免用户线程级的开销,这是它能比多线程系统管理更多并发连接的原因。

【注1】The loop.run_in_executor() method can be used with a concurrent.futures.ThreadPoolExecutor to execute blocking code in a different OS thread without blocking the OS thread that the event loop runs in.


coolxc
27 声望1 粉丝

ooooooooooooooooooooolxc