raise ValueError("No <form> element found in %s" % response)问题

from scrapy.spiders import CrawlSpider, Rule, Request 
from scrapy.linkextractors import LinkExtractor 
from haoduofuli.items import HaoduofuliItem
from scrapy import FormRequest 
 
account = '你的帐号'
password = '你的密码'
 
class myspider(CrawlSpider):
 
    name = 'haoduofuli'
    allowed_domains = ['haoduofuli.wang']
    start_urls = ['http://www.haoduofuli.wang/wp-login.php']
 
    def parse_start_url(self, response):

        formdate = {
                'log': account,
                'pwd': password,
                'rememberme': "forever",
                'wp-submit': "登录",
                'redirect_to': "http://www.haoduofuli.wang/wp-admin/",
                'testcookie': "1"
         }
        return [FormRequest.from_response(response, formdata=formdate, callback=self.after_login)]
 
 
    def after_login(self, response):

        lnk = 'http://www.haoduofuli.wang'
        return Request(lnk)
 
    rules = (
        Rule(LinkExtractor(allow=('\.html',)), callback='parse_item', follow=True),
    )
 
    def parse_item(self, response):
        item = HaoduofuliItem()
        try:
            item['category'] = response.xpath('//*[@id="content"]/div[1]/div[1]/span[2]/a/text()').extract()[0]
            item['title'] = response.xpath('//*[@id="content"]/div[1]/h1/text()').extract()[0]
            item['imgurl'] = response.xpath('//*[@id="post_content"]/p/img/@src').extract()
            item['yunlink'] = response.xpath('//*[@id="post_content"]/blockquote/a/@href').extract()[0]
            item['password'] = response.xpath('//*[@id="post_content"]/blockquote/font/text()').extract()[0]
            return item
        except:
            item['category'] = response.xpath('//*[@id="content"]/div[1]/div[1]/span[2]/a/text()').extract()[0]
            item['title'] = response.xpath('//*[@id="content"]/div[1]/h1/text()').extract()[0]
            item['imgurl'] = response.xpath('//*[@id="post_content"]/p/img/@src').extract()
            item['yunlink'] = response.xpath('//*[@id="post_content"]/blockquote/p/a/@href').extract()[0] 
            item['password'] = response.xpath('//*[@id="post_content"]/blockquote/p/span/text()').extract()[0] 
            return item

按照教程的说法:return Request(lnk) 这一个请求也算作 初始URL 只不过 不是 start_urls 的返回response 所以不会调用 parse_start_url 函数哦!

但实际运行中,发现还是调用了 parse_start_url, 然后到:

return [FormRequest.from_response(response, formdata=formdate, callback=self.after_login)]

就出现了:

raise ValueError("No <form> element found in %s" % response)

问题。求怎么解决。

阅读 5.7k
2 个回答
return Request(lnk, callback=self.parse_item)

这样确实可以调用parse_item,但是怎么用rules来循环呢?

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题