https://www.cnblogs.com/yisany/p/13524018.html
https://tyyzqmf.github.io/2018/10/22/ClickHouse%E6%9C%AC%E5%9...
--auto-generated definition 物理表(本地表)
create table tdp_main.o_vehicle_data_pre_charging_u_t
ON CLUSTER 'ck_uat'(
vin String,
tenantId Nullable(String),
tboxType Nullable(String),
sendType Nullable(String),
batteryVoltageData Nullable(String),
batteryTempData Nullable(String),
collectTime String,
receiveTime Nullable(String),
chargingStatus Nullable(String),
totalCurrent Nullable(String),
batterySoc Nullable(String),
maxTemp Nullable(String),
minTemp Nullable(String),
totalDistance Nullable(String),
maxCellVoltage Nullable(String),
minCellVoltage Nullable(String),
createTime Nullable(String)
)engine = ReplicatedMergeTree('/clickhouse/tables/o_vehicle_data_pre_charging_u_t/{layer}/{shard}', '{replica}')
ORDER BY (vin, collectTime)
SAMPLE BY intHash32(tenantId)
SETTINGS index_granularity = 8192;
-- auto-generated definition 逻辑表(分布式表)
create table o_vehicle_data_pre_charging_u_d_c
(
vin String,
tenantId Nullable(String),
tboxType Nullable(String),
sendType Nullable(String),
batteryVoltageData Nullable(String),
batteryTempData Nullable(String),
collectTime String,
receiveTime Nullable(String),
chargingStatus Nullable(String),
totalCurrent Nullable(String),
batterySoc Nullable(String),
maxTemp Nullable(String),
minTemp Nullable(String),
totalDistance Nullable(String),
maxCellVoltage Nullable(String),
minCellVoltage Nullable(String),
createTime Nullable(String)
)engine = Distributed('ck_uat', 'tdp_main', 'o_vehicle_data_pre_charging_u_t', rand());
在每个节点上创建本地表,在每个节点上创建一张分布式表去关联本地表。
CREATE TABLE ontime_d_local (FlightDate Date,Year UInt16) ENGINE = MergeTree(FlightDate, (Year, FlightDate), 8192);
CREATE TABLE ontime_d_all AS ontime_d_local ENGINE = Distributed(test, default, ontime_d_local, rand());
在任一节点向分布式表插入数据,千万注意不能向本地表插入数据.
ClickHouse实时同步MySQL数据
二、Kafka+ClickHouse物化视图方式同步
Canal-Server完成binlog的解析,并且将解析后的json写入Kafka;
Canal-Server可以根据正则表达式过滤数据库和表名,并且根据规则写入Kafka的topic;
ClickHouse使用KafkaEngine和Materialized View完成消息消费,并写入本地表;
image.png
优点:
Kafka支持水平扩展,可以根据数据规模调整partition数目;
Kafka引入后将写入请求合并,防止ClickHouse生成大量的小文件,从而影响查询性能;
Canal-Server支持规则过滤,可以灵活配置上游的MySQL实例的数据库名和表名,并且指明写入的Kafka topic名称;
缺点:
需要维护Kafka和配置规则;
ClickHouse需要新建相关的视图、Kafka Engine的外表等;
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。