node.js的child_process执行puppeteer爬取数据的进程通信问题

很白的小白
  • 114

最近在使用puppeteer进行数据爬取,由于页面只有cookie加密,因此选择请求的方式爬取数据。
想开多个子进程去发送不同请求,实现快速爬取数据,但是在爬取过程中,遇到了进程通信的问题。

const child_process = require('child_process');
const {recLog} = require("./utils/log");

const gChildProcessNumber = 1;

creatProcess()

async function creatProcess() {
  for (let i = 0; i < gChildProcessNumber; i += 1) {
    const workerProcess = child_process.spawn('npm.cmd', ['test', 'request.e2e.js'], {
      cwd: process.cwd(),
      shell: true,
    })
}

代码如上,先模拟了一个子进程,但是无法实现进程间通讯,采用Socket进程间通信的话,子进程进程间通信代码,如下:

  // const pipe = new net.Socket({ writable: true, readable: true});
  // recLog(pipe)
  // pipe.write('data');

加上上面代码后,puppeteet执行时会出现Error: Navigation failed because browser has disconnected!错误,不清楚是什么原因?
问题:1.如何实现进程通信。2.错误的原因。
同时当我使用ipc进程通信,代码如下:
parent.js

const child_process = require('child_process');
const {recLog} = require("./utils/log");

const gChildProcessNumber = 1;

creatProcess()

async function creatProcess() {
  for (let i = 0; i < gChildProcessNumber; i += 1) {
    const workerProcess = child_process.spawn('npm.cmd', ['test', 'request.e2e.js'], {
      cwd: process.cwd(),
      shell: true,
      stdio: ['ipc'],
    })
    workerProcess.on('spawn', () => {
      recLog(`子进程${workerProcess.pid}衍生成功`)
    })
    workerProcess.on('message', async (data) => {//监听子进程消息方法
      recLog(data)
    })
    workerProcess.on('error', (error) => {
      recLog(`触发父进程Error事件,子进程${workerProcess.pid}出错`, 'error');
      if (error) {
        recLog(error);//封装的log4js调试方法。
      }
    });
    workerProcess.on('exit', (code) => {
      recLog(`触发父进程exit事件,子进程${workerProcess.pid}触发exit事件,退出码${code}`, 'error');
    });
    workerProcess.on('close', (code) => {
      recLog(`子进程${workerProcess.pid}已退出,退出码${code}`, 'error');
    });
  }
}

child.e2e.js

import Mock from 'mockjs';
import {wait} from "signale/types";
import * as http from "http";
import {logTestData, createErrorScreenShot} from '../e2e/utils';
import {storeData, BASE_URL, USER_DATA, fetchResponse, login} from './utils';
import {recLog} from "./utils/log";

describe('request.e2e.js', () => {
  beforeAll(async () => {
    jest.setTimeout(100000);
  });
  Pupeteer_Login();//登陆方法
  process.on('message', (message => {//消息监听和发送方法,没调用
    recLog(message)
    process.send('test')
    recLog('执行了send')
    Pupeteer_getIPv4Info();
  }))
  recLog('执行结束')

  function Pupeteer_Login() {
    it('loginTest', async () => {
      await login();
    });
  }

  async function getOnlyCookieValue() {//处理cookie
    const cookie = await page.evaluate(() => document.cookie);
    const RegExp = /(?<value>(?<=:).*)/g;
    const cookieValue = RegExp.exec(cookie).groups.value;
    return cookieValue;
  }

  function Pupeteer_getIPv4Info() {//发送请求
    it('getIPv4InfoTest', async () => {
      const cookieValue = await getOnlyCookieValue();
      recLog(cookieValue)
      const data = {
        id: 1,
        jsonrpc: "2.0",
        method: "lan_show",
        params: {src_type: 1},
      }
      const headers = {
        'content-type': 'application/json',
        'Cookie': `ssid:${cookieValue}`,
      };
      const result = await fetchResponse('http://192.168.10.1/rpc', 'POST', headers, data);
      recLog(result)
      // storeData(result, 'test')
    });
  }
});

小白一枚,望大神告知!

回复
阅读 178
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
宣传栏