关于node爬虫异常中断问题

话不多说 上代码

'use strict';

// const http = require('http');
const https = require('https');
const cheerio = require('cheerio');
const iconv = require('iconv-lite')
const fs = require('fs');

let totalPage = 1; // 总页数
let bookList = []; //
const baseUrl = 'https://www.fqxsw.cc/toptoptime/1.html';

// 获取图片地址
const getBookInfo = url => {
  https.get(url, res => {
    let html = '';
    res.on('data', r => {
      html += iconv.decode(r, 'GBK'); // 页面转码 避免乱码
    });
    res.on('error', err => {
      console.log(err);
    });
    res.on('end', () => {
      const $ = cheerio.load(html);
      const domList = $('.book-coverlist .caption');
      const curPage = $('.active').children('span').text() - 0;
      totalPage = $('.last').text() - 0;
      console.log(curPage);
      Object.values(domList).forEach(dom => {
        const bookName = $(dom).children('h4').children('a').attr('title');
        const bookUrl = $(dom).children('h4').children('a').attr('href');
        const auther = $(dom).children('small').text().split(' / ')[0];
        const resume = $(dom).children('p').text();
        bookName && bookUrl ? bookList.push({
          bookName,
          bookUrl,
          auther,
          resume,
        }) : void (0);
      });
      if (curPage < totalPage) {
        const nextUrl = url.replace(/[0-9]+/g, curPage + 1);
        console.log(nextUrl);
        getBookInfo(nextUrl);
      }
    });
  });
};

(async () => {
  try {
    await getBookInfo(baseUrl);
  } catch (err) {
    console.log(err);
    throw err;
  }
})();

以下是报错:

events.js:287
      throw er; // Unhandled 'error' event
      ^

Error
Emitted 'error' event on ClientRequest instance at:
    at TLSSocket.socketErrorListener (_http_client.js:426:9)
    at TLSSocket.emit (events.js:310:20)
    at TLSSocket._emitTLSError (_tls_wrap.js:873:10)
    at TLSWrap.onerror (_tls_wrap.js:406:11)

求助了某G 某B都没有什么有价值的信息 希望有大佬能指点下

阅读 2.9k
1 个回答
推荐问题
宣传栏