最近在使用puppeteer进行数据爬取,由于页面只有cookie加密,因此选择请求的方式爬取数据。
想开多个子进程去发送不同请求,实现快速爬取数据,但是在爬取过程中,遇到了进程通信的问题。
const child_process = require('child_process');
const {recLog} = require("./utils/log");
const gChildProcessNumber = 1;
creatProcess()
async function creatProcess() {
for (let i = 0; i < gChildProcessNumber; i += 1) {
const workerProcess = child_process.spawn('npm.cmd', ['test', 'request.e2e.js'], {
cwd: process.cwd(),
shell: true,
})
}
代码如上,先模拟了一个子进程,但是无法实现进程间通讯,采用Socket进程间通信的话,子进程进程间通信代码,如下:
// const pipe = new net.Socket({ writable: true, readable: true});
// recLog(pipe)
// pipe.write('data');
加上上面代码后,puppeteet执行时会出现Error: Navigation failed because browser has disconnected!
错误,不清楚是什么原因?
问题:1.如何实现进程通信。2.错误的原因。
同时当我使用ipc进程通信,代码如下:
parent.js
const child_process = require('child_process');
const {recLog} = require("./utils/log");
const gChildProcessNumber = 1;
creatProcess()
async function creatProcess() {
for (let i = 0; i < gChildProcessNumber; i += 1) {
const workerProcess = child_process.spawn('npm.cmd', ['test', 'request.e2e.js'], {
cwd: process.cwd(),
shell: true,
stdio: ['ipc'],
})
workerProcess.on('spawn', () => {
recLog(`子进程${workerProcess.pid}衍生成功`)
})
workerProcess.on('message', async (data) => {//监听子进程消息方法
recLog(data)
})
workerProcess.on('error', (error) => {
recLog(`触发父进程Error事件,子进程${workerProcess.pid}出错`, 'error');
if (error) {
recLog(error);//封装的log4js调试方法。
}
});
workerProcess.on('exit', (code) => {
recLog(`触发父进程exit事件,子进程${workerProcess.pid}触发exit事件,退出码${code}`, 'error');
});
workerProcess.on('close', (code) => {
recLog(`子进程${workerProcess.pid}已退出,退出码${code}`, 'error');
});
}
}
child.e2e.js
import Mock from 'mockjs';
import {wait} from "signale/types";
import * as http from "http";
import {logTestData, createErrorScreenShot} from '../e2e/utils';
import {storeData, BASE_URL, USER_DATA, fetchResponse, login} from './utils';
import {recLog} from "./utils/log";
describe('request.e2e.js', () => {
beforeAll(async () => {
jest.setTimeout(100000);
});
Pupeteer_Login();//登陆方法
process.on('message', (message => {//消息监听和发送方法,没调用
recLog(message)
process.send('test')
recLog('执行了send')
Pupeteer_getIPv4Info();
}))
recLog('执行结束')
function Pupeteer_Login() {
it('loginTest', async () => {
await login();
});
}
async function getOnlyCookieValue() {//处理cookie
const cookie = await page.evaluate(() => document.cookie);
const RegExp = /(?<value>(?<=:).*)/g;
const cookieValue = RegExp.exec(cookie).groups.value;
return cookieValue;
}
function Pupeteer_getIPv4Info() {//发送请求
it('getIPv4InfoTest', async () => {
const cookieValue = await getOnlyCookieValue();
recLog(cookieValue)
const data = {
id: 1,
jsonrpc: "2.0",
method: "lan_show",
params: {src_type: 1},
}
const headers = {
'content-type': 'application/json',
'Cookie': `ssid:${cookieValue}`,
};
const result = await fetchResponse('http://192.168.10.1/rpc', 'POST', headers, data);
recLog(result)
// storeData(result, 'test')
});
}
});
小白一枚,望大神告知!