python 爬图片变成404

发布于
2017-12-03

更新于
2017-12-03

import os.path

from re import findall
from urllib.request import urlopen
from urllib.request import Request




path='D:/python/'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
if not os.path.exists(path):
    os.mkdir(path)

url = 'http://www.27270.com/tag/234.html'

req = Request(url=url, headers=headers)
with urlopen(req) as fp:
    content = fp.read().decode('gbk')

pattern = 'src="(.+?)" width'
result = findall(pattern, content)

for index, item in enumerate(result):
    itemreq = Request(url=str(item), headers=headers)
    with urlopen(itemreq) as fp:
        with open(path+str(index)+'.jpg','wb') as f:
            f.write(fp.read())

print('完成')

图片描述