题目描述
爬取豆瓣电影网电影的标题
简介和评分
相关代码
// 请把代码文本粘贴到下方(请勿用图片代替代码)
import scrapy
class Jjj6Item(scrapy.Item):
include = scrapy.Field()
movie_name = scrapy.Field()
remark = scrapy.Field()
import scrapy
from jjj6.items import Jjj6Item
class doubanmovieSpider(scrapy.Spider):
name = "movie"
allowed_domains = ["movie.douban.com"]
start_urls = ['https://movie.douban.com/chart']
def parse(self, response):
filename = "movie.html"
open(filename, 'wb+').write(response.body)
item = Jjj6Item()
movie =response.xpath("//tr[starts-with(@class,'item')]")
for i in movie:
item['movie_name'] = i.xpath('.//div[@class="p12"]/a/text()').extract()[0]
item['include'] = i.xpath('.//div[@class="p12"]/p/text()').extract()[0]
item['remark'] = i.xpath('.//div[@class="star clearfix"]/span[1]/text()').extract()[0]
item.append(item)
return item
xpath
错了,应该是里面的
@class="p12"
中不是数字1
而是英文字母l