TypeError: sequence item 0: expected str instance, bytes found

# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import Request
from urllib import parse
import datetime
from ArticleSpider.items import JobboleArticleItem
from ArticleSpider.utilss.common import get_md5
class JobboleSpider(scrapy.Spider):
    name = 'jobbole'
    allowed_domains = ['blog.jobbole.com']
    start_urls = ['http://blog.jobbole.com/all-posts/']

    def parse(self, response):

        post_urls = response.css("#archive .floated-thumb .post-thumb a::attr(href)").extract()
        for post_url in post_urls:
            yield Request(url=parse.urljoin(response.url,post_url),callback=self.parse_detail)
        #next_url = response.css(".next.page-numbers::attr(href)").extract_first("")
        #print(next_url)
        #if next_url:
         #   yield Request(url=parse.urljoin(response.url,next_url),callback=self.parse)
        # 只是传递并未调用


    def parse_detail(self, response):
        article_item = JobboleArticleItem()
        article_item["url_object_id"] = get_md5(response.url)
        title = response.xpath(' // *[ @ class = "entry-header"] / h1 / text()').extract()
        create_time = response.css("p.entry-meta-hide-on-mobile::text").extract()[0].strip().replace("·","").strip()
        # try:
        #     create_time = datetime.datetime.strptime(create_time, "%Y/%m/%d").date()
        # except Exception as e:
        #     create_time = datetime.datetime.now().date()

        article_item["title"] = title
        article_item["create_time"] = create_time

        yield article_item

这里是主程序,下面是pipeline中的代码

import codecs
import json
import MySQLdb
import MySQLdb.cursors
class MysqlPipeline(object):
    def __init__(self):
        self.conn = MySQLdb.connect('localhost', 'root', 'root', 'mysql', charset='utf8', use_unicode=True)
        self.cursor = self.conn.cursor()

    def process_item(self, item, spider):
        insert_sql = """
        insert into title(title,create_time,url_object_id)
        VALUES (%s, %s, %s)
        """
        self.cursor.execute(insert_sql, (item["title"], item["create_time"], item["url_object_id"]))
        self.conn.commit()

我描述下出现的问题,现在的情况是如果不执行pipeline中写入mysql,程序可以实现抓取文章名和文章发表时间,如果在setting中设置执行pipeline,则会出现报错

2017-08-01 14:09:37 [scrapy.core.scraper] ERROR: Error processing {'create_time': '2017/07/31',
 'title': ['Neo4j 图数据库基础'],
 'url_object_id': '1a8e6c64968ed6db401b5769221f9b4f'}
Traceback (most recent call last):
  File "C:\python3\lib\site-packages\twisted\internet\defer.py", line 653, in _runCallbacks
    current.result = callback(current.result, *args, **kw)
  File "C:\Users\微软中国\ArticleSpider\ArticleSpider\pipelines.py", line 36, in process_item
    self.cursor.execute(insert_sql, (item["title"], item["create_time"], item["url_object_id"]))
  File "C:\python3\lib\site-packages\MySQLdb\cursors.py", line 234, in execute
    args = tuple(map(db.literal, args))
  File "C:\python3\lib\site-packages\MySQLdb\connections.py", line 316, in literal
    s = self.escape(o, self.encoders)
  File "C:\python3\lib\site-packages\MySQLdb\converters.py", line 90, in quote_tuple
    return "(%s)" % (','.join(escape_sequence(t, d)))
TypeError: sequence item 0: expected str instance, bytes found

根据报错的意思是,我写入的数据是byte类型,可是我debug的话
clipboard.png明明是str啊,mysql数据库不太会但也设置上了
clipboard.png也设置主键了 ,求大神看看这个同步写入mysql到底哪错了???

阅读 9k
1 个回答
日志信息里已经体现了呀,你的title是个列表,不是字符串
插入的时候应该这么写item["title"][0]或是在这之前处理成字符串

clipboard.png

撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题