python scrapy-selenium 中的数据提取 遍历不一个一个提取,而是一次取完问题

新手上路,请多包涵
# -*- coding: utf-8 -*-
import json

import scrapy
from scrapy import Request

from cosmetics.items import CosmeticsItem


class CosSpider(scrapy.Spider):
    name = 'cos'
    # allowed_domains = ['www.jd.com']
    # start_urls = ['https://search.jd.com/Search?keyword=%E5%8F%A3%E7%BA%A2&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&stock=1&page=1&s=54&click=0']

    def start_requests(self):
        star_url = 'https://search.jd.com/'
        yield Request(url=star_url, callback=self.parse, meta={"data": "0"})

    def parse(self, response):
        item = CosmeticsItem()
        ul_list = response.css('#J_goodsList > ul > li')
        page_next = response.css('#J_bottomPage > span.p-num > a.pn-next')
        print("ul_list is :::::::", ul_list)

        '''
        for li in ul_list:
            item = CosItemLoader(item=CosItem(), response=li)
            with open('rule.json', 'r') as f:
                data = json.load(f)
                for keys in data:
                    item.add_xpath(keys, data[keys])
            # item.add_xpath('img', './/div[@class="p-img"]/a/img/@href')
            # item.add_xpath('price', './/div[@class="p-price"]//i/text()')
            # item.add_xpath('name', './/div[@class="p-name p-name-type-2"]//em/text()')
            # item.add_xpath('commit_counts', './/div[@class="p-commit"]//a/text()')
            # item.add_xpath('shop', './/div[@class="p-shop"]/span/a/text()')
                    yield item.load_item()
        '''
        for l in ul_list:
            #al = ul.extract()
            print("ul is :::::", l.extract())
            img     = 'https:' + l.xpath('//div[@class="p-img"]/a/img/@src').extract()[0]
            price   = l.xpath('//div[@class="p-price"]//i/text()').extract()
            name    = l.xpath('//div[@class="p-name p-name-type-2"]//em/text()').extract()[0].strip(' ')
            commits = l.xpath('//div[@class="p-commit"]//a/text()').extract()[0]
            shop    = l.xpath('//div[@class="p-shop"]/span/a/text()').extract()[0]
            for field in item.fields.keys():
                item[field] = eval(field)
            yield item
        if len(page_next) > 0:
            yield Request(url=response.url, callback=self.parse, dont_filter=True, meta={"data": "2"})
        else:
            print("数据爬取完毕")

先贴spider中代码,遍历取出来的li列表,然后用xpath取数据的时候不是跟着遍历一个一个取,而是一次直接把所有的取出来了,这个是怎么回事啊,

为什么不跟着遍历一个一个取,而是一下取完?这种情况怎么解决??求大佬解决啊!!!!!

阅读 2.8k
1 个回答
新手上路,请多包涵

//div[@class="p-price"]//i/text()').extract()少加了个点

推荐问题