这几天想用selenium爬网页信息,简单写了一个但是总报错,找不到问题原因,各位帮我看看呗,大家可以直接执行我下面的代码,看看报错信息。
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import csv
import time
#selenium优化选项部分
fox_options = Options()
fox_options.add_argument('--headless')
fox_options.add_argument('--disable-gpu')
fox_options.add_argument('blink-settings=imagesEnabled=false')
fox_options.add_argument('log-level=3')
# 如果firefox没有安装在默认位置,就要手动指定位置
location = 'C:/Firefox/firefox.exe'
driver = webdriver.Firefox(options=fox_options,firefox_binary=location)
# 添加newline可以避免一行之后的空格,这样需要在python3环境下运行
csvfile = open(r'G:\che168\R0610.csv', 'w', newline='', encoding='gb18030')
# 指定逗号作为分隔符,并且指定quote方式为引用。这意味着读的时候都认为内容是被默认引用符(")包围的
writer = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_ALL)
keys = ['商家编号','车辆编号','车型','车龄','表显里程','售价','新车价格','上牌时间','折旧率','平均每年贬值','变速箱','排放标准', '排量','过户次数','发布日期','过户次数','所在地','发布日期','发动机','颜色','燃油标号', '驱动方式', '公司名称', '公司地址']
writer.writerow(keys)
#确定车型的总页数
driver.get("https://www.che168.com/china/benchi/benchirji/#pvareaid=108403#seriesZong")
C_page =driver.find_element_by_xpath('/html/body/div[12]/div[2]/a[8]').text
print(C_page)
for page in range(1, int(C_page)):
#https://www.che168.com/china/benchi/benchirji/a0_0msdgscncgpi1ltocsp2exx0/?pvareaid=102179#currengpostion
url = "http://www.che168.com/china/benchi/benchirji/a0_0msdgscncgpi1ltocsp" + str(
page) + "exx0/?pvareaid=102179#currengpostion"
print(url)
driver.get(url)
time.sleep(3)
for car_link in driver.find_elements_by_xpath('//*[@class="viewlist_ul"]//a'):
print(car_link.get_attribute('href'))
time.sleep(3)
try:
driver.get(car_link.get_attribute('href'))
time.sleep(3)
#商家id
Sj_Id=driver.find_element_by_id('car_dealerid').get_attribute('value')
#车辆ID
Car_Id=driver.find_element_by_id('car_infoid').get_attribute('value')
#车型信息
cartitle=driver.find_element_by_id('car_carname').get_attribute('value')
#车辆年限
Car_age=driver.find_element_by_id('car_age').get_attribute('value')
#行驶里程
carLong=driver.find_element_by_id('car_mileage').get_attribute('value')
#售价
carPrice=driver.find_element_by_id('car_price').get_attribute('value')
#新车价格
carNewPrice= driver.find_element_by_id("newprice").text.replace("新车含税价:",'').replace('万','')
#上牌时间
carDate=driver.find_element_by_id('car_firstregtime').get_attribute('value')
#折旧率
carZJ=round((float(carNewPrice)-float(carPrice))/float(carNewPrice),2)
#平均每年贬值
carBZ=round((float(carNewPrice)-float(carPrice))/int(Car_age),2)
#变速箱
carBSX=driver.find_element_by_xpath('/html/body/div[7]/div[1]/ul[1]/li[3]').text.replace('变 速 箱','')
#排放标准
carPF=driver.find_element_by_xpath('/html/body/div[7]/div[1]/ul[1]/li[4]').text.replace('排放标准','')
#排量
carPL=driver.find_element_by_xpath('/html/body/div[7]/div[1]/ul[1]/li[5]').text.replace('排 量','')
#过户次数
carGH=driver.find_element_by_xpath('/html/body/div[7]/div[1]/ul[2]/li[5]').text.replace('过户次数','')
#所在地
carCity=driver.find_element_by_id('citygroupid').text.replace('所 在 地','')
#发布日期
carLoginDate=driver.find_element_by_id('car_publicdatestr').get_attribute('value')
#发动机
carFDJ=driver.find_element_by_xpath('/html/body/div[7]/div[1]/ul[3]/li[1]').text.replace('发 动 机','')
#颜色
carColor=driver.find_element_by_xpath('/html/body/div[7]/div[1]/ul[3]/li[3]').text.replace('发 动 机','')
#燃油
carRY=driver.find_element_by_xpath('/html/body/div[7]/div[1]/ul[3]/li[4]').text.replace('燃油标号','')
#驱动方式
carQD=driver.find_element_by_xpath('/html/body/div[7]/div[1]/ul[3]/li[5]').text.replace('驱动方式','')
#公司名称
carXSS=driver.find_element_by_class_name("company-name").text
#公司地址
carXSS_Address=driver.find_element_by_class_name("company-adress").text
print(carZJ)
print(carBZ)
print(carCity)
except Exception as e:
print("部分数据未提取到")
pass
try:
oneCar = [Sj_Id,Car_Id,cartitle,Car_age,carLong,carPrice,carNewPrice,carDate,carZJ,carBZ,carBSX,carPF,carPL,carGH,carCity,carLoginDate,carFDJ,carColor,carRY,carQD,carXSS,carXSS_Address]
writer.writerow(oneCar)
except OSError:
pass
#driver.close()
csvfile.close()
以上是我写的全部代码了
提示selenium.common.exceptions.StaleElementReferenceException: Message: The element reference of <a class="carinfo" href="https://semnt.autohome.com.cn/monitor?rtype=2&dpmk=UOtguoW%2BhswqYKLEHvubvPKpCBAIKLTc7NQXBltm7ISwukJBTG7viLy9SllbLrjA4O9YT1tt6tK8vkNDQHG5gOi%2BV0FBZLyc6LoeFFRsuNLsvhwWSGu70uupVlcJKrTVq7FYRE4%2F6ISwvUNYHW28hKS%2FQk0YcbyA7epXRRw%2FuITv6EtCHz%2B%2F7rq8T0dbcP%2FQ7fsVBhA4%2F4uruE1AS37xk%2Fz4HwcQOP%2BLq7JIRUpuuNexph5ESz%2Fwhbu6H1hBb%2B%2FQpL0YE0Ft5NDv6BtETn7xk%2FnnGwEfM6%2Fcq7FYRElt7IG7u0pFSH7xk%2FzlEwEQOP%2BLvbhJQEtr8ZPq%2BR8UDTWr1ODvWE9MaOuBuLNIWVs%2Fqcj57lhPS3D%2F0OroExFbZuqEv7lDWVs%2FqMLg71hPTW3vhrq6VlcaPbDY7alAQU1k7oC%2Fp1gBHDG01auxS0NAcP%2FB%2B%2BQMHBc%2FuJOzqUtFS23thrCySkVJbP%2Bdq%2BgTAQB%2B55O4u0hESWvkiLCySkVbcP%2FE6NQVBltm%2F4C5u0tFSWztgbipVlcMPYLT%2B%2BQNV0N%2B7IG5ukpEQGXkiKunWBgYJILB%2B%2BIZEFtm6YGlqRgcHQOtw%2BDoH1dDbe2HvKdYFhYvqe7k5B4QW2bsnavoFQYNA7vQ6v8VB1tm7Z%2B5uE1AS2TviLG8Q0BLbu6DurJWVxowtNLi1A8HFX7nk%2BH%2FDgUKZvKe%2FvwNWxo0uIC%2Fs1QWFjHy1ezqFhALc%2B%2BEurtCQVZv64mwuEtNSXK1xeTnRQUPPa%2FU6OIeSEhs6IS%2FuVhZWy%2B40ublHiobNbmTs75MR1V%2BvtD97h0aCyX%2Fi7q9S0RVfrre5u8JKhA4%2F4u9u0NGSGjxk%2FrkDwcaOf%2BLu%2FY%3D#pos=2#page=1#rtype=0#isrecom=0#filter=36a469a0_0a0_0a0_0#module=10#refreshid=0#recomid=0#queryid=#cartype=20"> is stale; either the element is no longer attached to the DOM, it is not in the current frame context, or the document has been refreshed
可以设置一等待时间,等页面加载完成后再解析