这个是错误提示
TimeoutException Traceback (most recent call last)
Cell In[84], line 7
3 df=pd.DataFrame(columns=['用户名','状态','日期','投票','地点','评论'])
5 while True:
6 #爬取第一个页面
----> 7 name = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//div[@class="comment"]/h3/span[@class="comment-info"]/a')))
8 names=[i.text for i in name]
9 #print(names,len(names))
10
11 # 注意,网页上有两个 < div class="publisher_info" 作者在第一个里面,所以此项的xpath要写得更详细
12 # 有时,作者信息是分好几个a标记写,只要第一个
13 #状态
File ~\AppData\Roaming\Python\Python311\site-packages\selenium\webdriver\support\wait.py:105, in WebDriverWait.until(self, method, message)
103 if time.monotonic() > end_time:
104 break
--> 105 raise TimeoutException(message, screen, stacktrace)
TimeoutException: Message:
这个是代码
wait = WebDriverWait(driver, 20) # 设置等待时长
df=pd.DataFrame(columns=['用户名','状态','日期','投票','地点','评论']) # 空数据框
while True:
#爬取第一个页面
name = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//div[@class="comment"]/h3/span[@class="comment-info"]/a')))
names=[i.text for i in name]
#print(names,len(names))
zhuangtai = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//div[@class="comment"]/h3/span[@class="comment-info"]/span[1]')))
zhuangtais=[i.text for i in zhuangtai]
data = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//div[@class="comment"]/h3/span[@class="comment-info"]/span[3]')))
datas=[i.text for i in data]
#print(dates,len(dates))
# 出版社和日期xpath类似
vote = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//div[@class="comment"]/h3/span[@class="comment-vote"]/span[@class="votes vote-count"]')))
votes=[i.text for i in vote]
location = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//div[@class="comment"]/h3/span[@class="comment-info"]/span[@class="comment-location"]')))
locations=[i.text for i in location]
#评论
comment = wait.until( EC.presence_of_all_elements_located( (By.XPATH, '//div[@class="comment"]/p[@class=" comment-content"]/span[@class="short"]')))
comments=[i.text for i in comment]
try:
df1=pd.DataFrame({'用户名':names,'状态':zhuangtais,'日期':datas,'投票':votes,'地点':location,'评论':comment})
df=pd.concat([df,df1],ignore_index=True)#把数据拼接到数据框中,组成数据库中,每列的长度必须是一样的,否则会报错
except:
pass
js = 'window.scrollTo(0,document.body.scrollHeight)' #窗口滚动到底部
driver.execute_script(js)
#点击到下一页
next_btn=wait.until(EC.element_to_be_clickable((By.XPATH, '//a[contains(text(),"后页")]' )))
if next_btn.get_attribute('href')=="?start=500&limit=20&sort=new_score&status=P&percent_type=;" : # 下一页按钮失效则跳出循环
break
next_btn.click()
# comms=int(comms)
运行不出来,求解