概述
本文主要介绍通过Datax实现Hive数据迁移到崖山分布式。

环境
源Hive版本:3.1.3
目标YashanDB版本:23.2.3.100

建表脚本

-- hive

CREATE TABLE IF NOT EXISTS product(

    product_no char(5),

    product_name varchar(30),

    cost double,

    price duble

)

ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'

STORED AS textfile;

 

-- yashandb

CREATE TABLE product

(

    product_no CHAR(5),

    product_name VARCHAR2(30),

    cost NUMBER,

    price NUMBER

);

hive表和DataX数据类型映射
image.png

hive同步到崖山job配置

{

    "job": {

        "content": [

            {

                "reader": {

                    "name":"hdfsreader",

                    "parameter":{

                        "column":[

                            {

                                "index":0,

                                "type":"string"

                            },

                            {

                                "index":1,

                                "type":"string"

                            },

                            {

                                "index":2,

                                "type":"double"

                            },

                            {

                                "index":3,

                                "type":"double"

                            }

                        ],

                        "defaultFS":"hdfs://127.0.0.1:8020",

                        "encoding":"UTF-8",

                        "fieldDelimiter":"\u0001",

                        "fileType":"text",

                        "path":"/usr/hive/warehouse/sales.db/product"

                    }

                },

                "writer": {

                    "name": "yashandbwriter",

                    "parallel": {

                        "binder": 6

                    },

                    "parameter": {

                        "batchError": true,

                        "column":[

                            "PRODUCT_NO",

                            "PRODUCT_NAME",

                            "COST",

                            "PRICE"

                        ],

                        "connection": [

                            {

                                "jdbcUrl": "jdbc:yasdb://127.0.0.1:1688/yashandb",

                                "table": [

                                    "SALES.PRODUCT"

                                ]

                            }

                        ],

                        "batchSize": 4096,

                        "batchesPerTxn": 1000,

                        "password": "sales",

                        "preSql": ["truncate table SALES.PRODUCT"],

                        "session": [],

                        "username": "sales",

                        "writeMode": "bulkinsert"

                    }

                }

            }

        ],

        "setting": {

            "speed": {

                "channel": "1"

            }

        }

    }

}

执行同步
python bin/datax.py job/hive2yashandb.json


YashanDB
1 声望0 粉丝

崖山数据库系统YashanDB是深圳计算科学研究院自主设计研发的新型数据库管理系统,融入原创的有界计算、近似计算、并行可扩展和跨模融合计算理论,可满足金融、政企、能源等关键行业对高性能、高并发及高安全性的...