代码部分:
import scrapy
from scrapy.selector import Selector
from scrapy.http import Request
from tutorial.items import TutorialItem
class DmozSpider(scrapy.Spider):
name = "dmoz"
allowed_domains = ["dmoz.org"]
start_urls = [
"http://www.tongchuan.gov.cn/html/zxzx/tcyw/"
]
headers = {
"host": "www.tongchuan.gov.cn",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "h-CN,zh;q=0.8",
"Connection": "keep-alive",
"Content-Type": " application/x-www-form-urlencoded; charset=UTF-8",
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 "
"(KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1"
}
def parse(self, response):
selector = Selector(response)
urls = selector.xpath('//div[@class="zilist_rlb"]/ul/li/a/@href').extract()
for url in urls:
print url+"========================"
yield Request(url, callback=self.parsecontent)
yield Request("/html/zxzx/tcyw/201709/180130.html",callback=self.parsecontent)
#page_links=selector.xpath('//div[@class="page"]//a').extract()
pagelinks = selector.xpath('//div[@class="cms_page"]/span').extract()
for pagelink in pagelinks:
print pagelink
def parsecontent(self,response):
selector1 = Selector(response)
item = TutorialItem()
item["title"] = selector1.xpath('//div[@class="zishow_tit"/span/text()]')
print item["title"]
问题解决了,url地址弄错了