最近在写一个爬虫程序,但是调用main()就不停的提示IndexError: list index out of range
可是在子函数进行测试的时候明明是不存在这个问题的
代码如下
from selenium import webdriver
from lxml import etree
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url='http://www.ipe.org.cn/IndustryRecord/Regulatory.aspx'
driver=webdriver.Chrome()
driver.get(url)
def parse_and_save():
s=driver.page_source
ss=etree.HTML(s)
sss=ss.xpath('//*[@id="table_con0"]/div[2]/table/tbody/tr')
with open(r'C:\Users\xiaot\Desktop\IPE.csv','w',encoding='utf-8') as f:
for tr in sss:
name=tr.xpath('./td')[1].text
location=tr.xpath('./td')[2].text+tr.xpath('./td/span')[0].text
year=tr.xpath('./td')[3].text
records=tr.xpath('./td')[4].text
f.write("{},{},{},{}\n".format(name,location,year,records))
def Page_Turner():
WebDriverWait(driver,60000000).until(EC.presence_of_element_located((By.XPATH,'//*[@id="pagination0"]/div/div/div[3]/i[3]')))
driver.find_element_by_xpath('//*[@id="pagination0"]/div/div/div[1]/select/option[4]').click()
WebDriverWait(driver,60000000).until(EC.presence_of_element_located((By.XPATH,'//*[@id="pagination0"]/div/div/div[3]/i[3]')))
parse_and_save()
driver.find_element_by_xpath('//*[@id="pagination0"]/div/div/div[3]/i[3]').click()
WebDriverWait(driver,60000000).until(EC.presence_of_element_located((By.XPATH,'/html/body/div[5]/div[2]/div[2]/div/div[2]/a')))
code=driver.find_element_by_xpath('/html/body/div[5]/div[2]/div[2]/div/div[2]/a')
if code:
while True:
val=driver.find_element_by_xpath('//*[@id="txtCode"]').get_attribute('value')
if val and len(val)==4:
break
pass
pass
driver.find_element_by_xpath('/html/body/div[5]/div[2]/div[2]/div/div[2]/a').click()
def main():
i=1
for i in range(1,3):
Page_Turner()
i+=1
print('It is done!')
main()
报错类型为
File "C:/Users/ttt.py", line 46, in parse_and_save
name=tr.xpath('./td')[1].text
IndexError: list index out of range
可是我在用Page_Turner()这个函数进行调试的时候就不会报错
请问这是为什么?
是我主函数写的有问题吗?
我这里用Firefox似乎没有问题
也许你希望追加到文件,而不是每次重写