# import sys
# print sys.getdefaultencoding()
import urllib
import re
def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html
def getNewslist(html):
reg = '<i class="icon-angle-right"></i>(.*)</a>'
url = re.findall(reg,html)
return url
if __name__ == '__main__':
cslgHighlightnews = 'http://news.cslg.cn/Index/newslist/id/161'
html = getHtml(cslgHighlightnews)
# print html
newslist = getNewslist(html)
for i in newslist:
print i
为何正则每次匹配的结果都不一样而且都是不完整的
multiline?