Python正则表达式在网页和软件上测试正确,拿到程序里匹配不到结果

#coding=gbk
import re
import urllib
import requests

# headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36"}
url = "http://www点58pic点com/tupian/xinnian-0-0-1点html"

def craw(url):
    html1 = urllib.request.urlopen(url).read()
    html1 = str(html1)
    pat1 = '(http://pic.qiantucdn.com).+?(/format/webp)'
    imagelist = re.compile(pat1).findall(html1)
    print(imagelist)
    x = 1
    for imageurl in imagelist:
        imagename = "D:/a1/" + str(x) + ".webp"
        imageurl = "https://" + imageurl
        print(imageurl)
        try:
            urllib.request.urlretrieve(imagelist, filename=imagename)
            print(imageurl)
        except urllib.error.URLError as e:
            if hasattr(e, "code"):
                x += 1
            if hasattr(e, "reason"):
                x += 1
        x += 1
 
craw(url)
阅读 2.6k
2 个回答

给你个建议....
dom格式那么清晰的
用lxml模块的xpath取链接

新手上路,请多包涵

应该是对方网站做的反爬虫,改了图片的地址

session = requests.Session()

session.headers = headers
def crawl(url):
    html = session.get(url).content
    pat1 = '(http://pic.qiantucdn.com/.+?0)"'
    images_list = re.findall(pat1, html)
    print images_list

crawl(url)

# ['http://pic.qiantucdn.com/58pic/28/29/87/73R58PICNSW.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/36/60/29J58PICKMe.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/2018/01/29/22/29466c2746dc7858651515e0654709bb.jpeg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/25/19/86R58PICmz5.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/22/98/07m58PICNQj.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/29/02/81e58PIC6U2.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/25/59/46m58PICjni.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/25/72/69u58PICFYr.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/34/76/67X58PIC3RF.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/22/00/01Z58PICWH2.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/34/91/89i58PICQEB.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/28/39/54N58PICp3h.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/36/11/43a58PICX9P.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/32/56/40r58PICxRD.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/23/69/02j58PICkyM.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/32/04/21V58PIC9Sc.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/29/02/28w58PIC8tf.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/22/14/96t58PICSw3.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/24/36/14X58PICdY9.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/28/58/97M58PICJgr.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/31/23/58658PICPrx.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/31/66/58R58PICvjW.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/35/02/20F58PICQD8.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/34/62/48u58PICtcA.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/32/48/21k58PIC56I.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/29/34/46658PICIFi.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/35/23/76758PICJGK.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/33/41/66P58PICKZi.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/34/73/20458PICEQw.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/23/21/57G58PIC3WB.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/33/41/24p58PICNmP.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/35/64/03c58PICQbX.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/36/72/93y58PICEid.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/35/99/82758PICN8v.JPG!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/35/18/76c58PIC4CB.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/34/97/62t58PIC3sT.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/35/69/82S58PICweA.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/35/09/87K58PICM2J.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/36/07/95J58PICjFn.jpg!/fw/300/clip/0x400a0a0', 'http://pic.qiantucdn.com/58pic/28/27/39/58558PICXAz.jpg!/fw/300/clip/0x400a0a0']
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题