可能是抓取的网页被压缩了缘故,您可以试一下用这个抓取网页:参考来源:http://www.cnblogs.com/jixin/... import socketimport urllib2import zlib user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'headers = { 'User-Agent' : user_agent } def Get(url, refer=None): try: req = urllib2.Request(url,headers = headers) req.add_header('Accept-encoding', 'gzip')#默认以gzip压缩的方式得到网页内容 if not (refer is None): req.add_header('Referer', refer) response = urllib2.urlopen(req, timeout=120) html = response.read() gzipped = response.headers.get('Content-Encoding')#查看是否服务器是否支持gzip if gzipped: html = zlib.decompress(html, 16+zlib.MAX_WBITS)#解压缩,得到网页源码 return html except urllib2.HTTPError, e: return e.read() except socket.timeout, e: return '' except socket.error, e: return ''
可能是抓取的网页被压缩了缘故,您可以试一下用这个抓取网页:
参考来源:http://www.cnblogs.com/jixin/...
import socket
import urllib2
import zlib
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
def Get(url, refer=None):