Python爬虫使用MongoDB 提示No connection adapters were found for如何解决?

#####coding=utf-8
import urllib
import urllib.request
import pymysql
import time
import requests
import datetime
import pandas as pd
from bs4 import BeautifulSoup
import pymongo 
from pymongo import MongoClient
import gridfs

#####获取mongoClient对象
client = pymongo.MongoClient("localhost", 27017)
#####获取使用的database对象
db = client.news
#####开始爬取数据
def start_crawler():
    page_num = 1
    while page_num<=1:
        url = "http://www.sxcoal.com/news/seniorsearch?GeneralNewsSearch%5Bcategory_id%5D%5B0%5D=1&GeneralNewsSearch%5Bnews_industry_ids%5D=&GeneralNewsSearch%5Bnews_tag_ids%5D=&GeneralNewsSearch%5Bport_ids%5D=&GeneralNewsSearch%5Bprov_area_id%5D=&page={}&per-page=10".format(page_num)
        print (url)
        page_num += 1
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        headers = { 'User-Agent' : user_agent }
        req = urllib.request.Request(url,headers=headers)
        response=requests.get(url,headers=headers)
        content=response.text
        one_page = get_page_news(content)
        time.sleep(1)
        if one_page:
            to_mysql(one_page)
            time.sleep(1)
        else:
            break
    print ('新闻抓取完毕')           
#####爬取新闻标题、日期、地址到数据库
def to_mysql(one_page):
    print (one_page)
def get_page_news(content):
    soup = BeautifulSoup(content,'lxml')
    one_page_list = []
    for i in soup.find_all("div",class_="artnr"):  
        title = i.select('h4')[0].text
        url = i.a['href']
        date = i.p.find('span',class_='spandate').string.split(" ")[1]        
        one_page={'title':title,'url':url,'date':date,'type':'news','label':'www.sxcoal.com'}
        db.newstitle.insert_one(one_page)
        one_page_list.append((title,url,date))
    return one_page_list  
#####抓取具体内容
def get_new_body():
    link_list = get_news_linksfrom_database()
    for url in link_list:
        news_body = get_news_text(url)
        print('_id')
        #写入数据库
        one_page={'newsbody':get_news_text(url)}
        db.newstitle.insert_one(one_page)   
    print("新闻主体完毕!")
def get_news_linksfrom_database():  
    result = db.newstitle.find({'label':'www.sxcoal.com'},{'_id':1,'url':1})     
    return result if result else []
def get_news_text(url):
    html = requests.get(url)
    html.encoding = html.apparent_encoding
    soup = BeautifulSoup(html.text,'html.parser')
    try:
        return str(soup.find('div',{'id':'Zoom'}))
    except:
        return None
if __name__ == '__main__':  
    '''爬取新闻简要'''
#####   start_crawler() #开始爬虫
    '''爬取具体新闻'''
    get_new_body()

start_crawler()执行没有问题
get_new_body()执行报错

requests.exceptions.InvalidSchema: No connection adapters were found for '{'url': 'http://www.sxcoal.com/news/4564136/info', '_id': ObjectId('5a0ab7b7d1e44d23281339e6')}'

求大神解答

阅读 3.9k
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题