9

我需要把原来的 MongoDB 数据完整拷贝到新服务器上,查了一下实现方式蛮多的,但我觉得都不如写个 node 脚本来实现自由方便,这个方式也应该没有跨版本不兼容的坑,至少我从3.4升级到4.2是没问题的
脚本依赖 mongodb, 需确保 node.js 版本支持直接运行 async + await
mongodb-copy.js

const {MongoClient} = require('mongodb');

// 源库地址
const baseUrl = 'mongodb://asseek:123456@xxx.xxx.xxx.xxx:27017/i_mall?authSource=admin';

//目标库地址 我这是开启了security视自己情况改
const targetUrl = 'mongodb://asseek:123456@127.0.0.1:27017/i_mall?authSource=admin';

// 要复制的库名
const dbName = 'i_mall';

// 要复制的表名 为空复制所有表
const collections = [];

// 要排除的表名
const excludeCollections = [];

// 拷贝前是否先删除目标库数据 如果确保不会出现主键重复可关闭
const dropTarget = true;

//数据分片大小 避免表太大占爆内存
const sharding = 10000;

(async () => {
  console.time('mongodb-copy');
  let collectionLength = 0, insertLength = 0, dropLength = 0;
  try {
    const baseClient = await MongoClient.connect(baseUrl, {useUnifiedTopology: true});
    const baseDb = await baseClient.db(dbName);

    const targetClient = await MongoClient.connect(targetUrl, {useUnifiedTopology: true});
    const targetDb = await targetClient.db(dbName);
    try {
      const baseCollections = await baseDb.collections();
      for (const baseCollection of baseCollections) {
        const name = baseCollection.collectionName;
        if (collections.length && !collections.includes(name)) continue;
        if (excludeCollections.includes(name)) continue;
        const targetCollection = targetDb.collection(name);
        let count = await baseCollection.find().count();
        let drop = 0, insert = 0, _id = '';
        if (dropTarget) {
          const {result: {n}} = await targetCollection.deleteMany();
          dropLength += n;
          drop = n;
        }
        while (count > 0) {
          const search = _id ? {_id: {$gt: _id}} : {};
          const baseResults = await baseCollection.find(search).limit(sharding).toArray();
          if (baseResults.length) {
            const {result: {n}} = await targetCollection.insertMany(baseResults);
            insertLength += n;
            insert += n;
            _id = baseResults[baseResults.length - 1]._id;
          }
          count -= sharding;
        }
        collectionLength += 1;
        console.log(`[${name}] [删除${drop}] [插入${insert}]`);
      }
      baseClient.close();
      targetClient.close();
    } catch (e) {
      console.error('出错了', e);
      baseClient.close();
      targetClient.close();
    }
  } catch (e) {
    console.error('连接问题', e);
  }
  console.log(`mongodb-copy: [复制集合${collectionLength}个] [删除${dropLength}] [插入${insertLength}]`);
  console.timeEnd('mongodb-copy');
})();

直接控制台执行文件

node mongodb-copy.js

拖远程库瓶颈基本都在数据传输上,200万条数据的库全部复制过来花了26分钟
远程库效率
测试了一下本地拷贝个200万数据的库1分39秒
本地拷贝效率


asseek
9.4k 声望288 粉丝

认真到底,终有回响