在做爬虫的时候出现了这个错误ValueError: Missing scheme in request url

代码部分:

import scrapy
from scrapy.selector import Selector
from scrapy.http import Request
from tutorial.items import TutorialItem


class DmozSpider(scrapy.Spider):
    name = "dmoz"
    allowed_domains = ["dmoz.org"]
    start_urls = [
        "http://www.tongchuan.gov.cn/html/zxzx/tcyw/"
    ]

    headers = {
        "host": "www.tongchuan.gov.cn",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "h-CN,zh;q=0.8",
        "Connection": "keep-alive",
        "Content-Type": " application/x-www-form-urlencoded; charset=UTF-8",
        "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 "
                      "(KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1"
    }

    def parse(self, response):
        selector = Selector(response)
        urls = selector.xpath('//div[@class="zilist_rlb"]/ul/li/a/@href').extract()
        for url in urls:
           print url+"========================"
           yield Request(url, callback=self.parsecontent)
        yield Request("/html/zxzx/tcyw/201709/180130.html",callback=self.parsecontent)
        #page_links=selector.xpath('//div[@class="page"]//a').extract()

        pagelinks = selector.xpath('//div[@class="cms_page"]/span').extract()
        for pagelink in pagelinks:
            print pagelink

    def parsecontent(self,response):
        selector1 = Selector(response)
        item = TutorialItem()
        item["title"] = selector1.xpath('//div[@class="zishow_tit"/span/text()]')
        print item["title"]
阅读 4.3k
1 个回答
新手上路,请多包涵

问题解决了,url地址弄错了

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题