python爬虫用 beautifulsoup 解析,有时候会出现 list out of range , 但是代码不变情况下,有时候也能运行。输出的列表均为空
import requests
from bs4 import BeautifulSoup
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0'}
response=requests.get("https://www.iqiyi.com/ranks1/3/0",headers=headers)
print(response.status_code)
response=response.text
soup= BeautifulSoup(response,"html.parser")
print(soup)
all_titles = soup.find_all("div", attrs={"class": "rvi__tit1"})
print("jil")
def get_title():
try:
all_titles = soup.find_all("div", attrs={"class": "rvi__tit1"})
title = []
for titles in all_titles:
title2 = titles.get_text().strip()
title.append(title2)
return title[0:10]
except IndexError:
pass
def get_heat():
try:
soup = BeautifulSoup(response, "html.parser")
all_heat = soup.find_all("span", attrs={"class": "rvi__index__num"})
heat = []
for heat1 in all_heat:
heat.append(heat1.get_text().strip())
return heat[0:10]
except IndexError:
pass
def get_introduction():
all_introduction = soup.find_all("p", attrs={"class": "rvi__des2"})
introduction = []
for introduction1 in all_introduction:
introduction.append(introduction1.get_text().strip())
return introduction[0:10] # 爬取前10
def list_show():
tplt = "{0:{3}^10}\t{1:{3}^6}\t{2:{3}^20\t{3:{3}^60}}" ## {1:{3}^10} 1表示位置,{3}表示用第3个参数来填充,^表示居中,10表示占10个位置
#print(tplt.format('排名', '剧名', '热度', chr(12288)))
lt = []
for i in range(10):
# 输出前25组数据
tplt1 = "{0:{3}^10}\t{1:{3}^6}\t{2:{3}^20\t{3:{3}^60}}"
lt.append([i +1, get_title()[i], get_heat()[i],get_introduction()[i]])
return lt
if __name__ == '__main__':
print(get_title())
print(get_heat())
print(get_introduction())
list_show()
输出statu_code 是 200, 也输出了soup ,但是就是列表返回不出数据
当get_title()返回的数组长度不够10个,你的程序一定会报错:数组地址越界。
看下面这段逻辑,如果get_title()返回的数组只有5个,此时如果i=7,
get_title()[7]
一定会抛出异常。