'''
程序目的:
1.提取豆瓣Top250电影,按照序号,名称,豆瓣评分给出
'''
import requests
import re
#抓取网页
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
#分析网页,提取所需信息
def parseHTML(info,html):
info = []
try:
tlt = re.findall(r'\"title\"\>[\u4e00-\u9fa5]+',html)
rlt = re.findall(r'\"v:average\"\>[0-9]\.[0-9]',html)
for i in range(len(tlt)): #电影个数
title = tlt[i].split('>')[1] #用>隔开
rank = rlt[i].split('>')[1] #用>隔开
info.append([title,rank])
except:
print("")
def printInfo(info):
tplt = "{:5}\t{:10}\t{:10}"
print(tplt.format("序号","电影名称","豆瓣评分"))
count = 0
for g in info:
count = count + 1
print(tplt.format(count,g[0],g[1]))
def main():
start_url = "https://movie.douban.com/top250?start"
depth = 9 #总共10页
infomation = [] #用来存储相关信息
for i in range(depth):
try:
url = start_url+str(25*i)
html = getHTMLText(url)
parseHTML(infomation,html)
except:
continue
printInfo(infomation)
main()
getHTMLText方法里r = requsts.get(url)写少了个e,应该是r = requests.get(url)