import scrapy
import pandas as pd
class searchinfo(scrapy.Spider):
name = "searchinfo"
allowed_domains = []
start_urls = ['https://www.baidu.com/']
custom_settings = {
"RANDOM_DELAY": 10
}
def __init__(self):
self.data_list = pd.read_csv('./files/baidusearch.csv', encoding="utf-8")
self.headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"}
def start_requests(self):
for j in range(0,len(self.data_list['link'])):
if j==2:
break
if pd.notna(self.data_list['link'][j]):
link_list = self.data_list['link'][j].split('\n')
for i in range(0,len(link_list)):
if link_list[i]:
print('打印链接')
print(link_list[i])
print(i)
yield scrapy.Request(link_list[i], dont_filter=True, headers=self.headers,callback=self.parse)
def parse(self, response):
print(response.text)
print(response.url)
我这个for 循环 请求link_list 为啥只能请求一个连接呢?第一个链接后面都请求不到