mpp迁移工具部署
更新时间: 2024-12-19 16:29:14
阅读 21
背景
客户的mpp迁移需求,为了保障数据的一致性,迁移工具migrate应运而生。
具体部署步骤
搭建
mm为迁移工具,其作用是将生产抽取任务:将源端MPP里的表转到目标MPP的表。基于de的抽取能力进行转移,需要搭配:
tf(抽取任务调度)
da(源端数据查询)
mysql(de业务表)
redis(da/de依赖)
可以采用docker run 的形式启动5个容器,下文会有介绍。
文件准备
镜像下载:
59.111.178.240:20280/stable/mppmigrate/mpp-migrate.tar.gz
59.111.178.240:20280/stable/mppmigrate/da.tar.gz
59.111.178.240:20280/stable/mppmigrate/de.tar.gz
59.111.178.240:20280/stable/mppmigrate/mysql.tar.gz
59.111.178.240:20280/stable/mppmigrate/redis.tar.gz
59.111.178.240:20280/stable/mppmigrate/tf.tar.gz
然后建立migrate目录
mkdir mpp-migrate
mkdir -p mpp-migrate/logs/de
mkdir -p mpp-migrate/data/db
mkdir -p mpp-migrate/data/redis
mkdir -p mpp-migrate/data/tf
mkdir -p mpp-migrate/data/db/conf
mkdir -p mpp-migrate/data/db/data
mkdir -p mpp-migrate/data/db/init-script
data/db/init-script下创建init.sql文件
# data/db/init-script下创建init.sql文件
CREATE USER IF NOT EXISTS 'youdata'@'%' IDENTIFIED BY 'youdata';
GRANT ALL PRIVILEGES ON *.* TO 'youdata'@'%' WITH GRANT OPTION;
CREATE USER IF NOT EXISTS 'youdata'@'localhost' IDENTIFIED BY 'youdata';
GRANT ALL PRIVILEGES ON *.* TO 'youdata'@'localhost' WITH GRANT OPTION;
CREATE DATABASE IF NOT EXISTS youdata CHARACTER SET utf8mb4;
CREATE TABLE `youdata`.`tf_task_flow` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT 'id',
`name` varchar(128) NOT NULL DEFAULT '' COMMENT '任务流名称',
`flow_type` varchar(128) NOT NULL DEFAULT '' COMMENT '任务流名称',
`namespace` varchar(128) NOT NULL DEFAULT '' COMMENT '任务流名称',
`queue_name` varchar(128) NOT NULL DEFAULT 'default' COMMENT '任务流名称',
`target_id` bigint(20) unsigned NOT NULL DEFAULT '0' COMMENT '目标id,用于指代任务流中的任务执行的对象,如分组、标签等',
`target_type` varchar(32) NOT NULL DEFAULT '' COMMENT '目标类型',
`priority` tinyint(4) NOT NULL DEFAULT '100' COMMENT '优先级',
`trace_id` varchar(600) NOT NULL DEFAULT '{}' COMMENT '追踪信息',
`build_context` mediumtext COMMENT '任务构建上下文,用于恢复任务链路,JSON',
`task_graph` text COMMENT '任务图,基于邻接矩阵存储',
`result_map` text COMMENT '执行结果map,JSON',
`status` varchar(32) NOT NULL DEFAULT 'INIT' COMMENT '本次刷新状态: INIT|WAITING|RUNNING|FAILED|SUCCESS|CANCEL',
`status_code` varchar(8) DEFAULT '100' COMMENT '状态码,作为状态补充,如异常时的异常码',
`execute_node` varchar(200) DEFAULT '' COMMENT '执行节点',
`fail_reason` mediumtext COMMENT '任务流失败原因(如果执行失败)',
`submit_user` varchar(64) DEFAULT '' COMMENT '任务流提交者',
`submit_time` datetime DEFAULT '0000-01-01 00:00:00' COMMENT '任务流提交时间',
`execution_time` datetime DEFAULT '0000-01-01 00:00:00' COMMENT '任务流开始执行时间',
`finish_time` datetime DEFAULT '0000-01-01 00:00:00' COMMENT '任务流执行结束时间',
`create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`),
KEY `idx_tid` (`target_id`),
KEY `idx_ttc` (`target_id`,`target_type`,`create_time`),
KEY `idx_sc` (`status`,`create_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='任务流';
CREATE TABLE `youdata`.`tf_task` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT 'id',
`trace_id` varchar(192) NOT NULL DEFAULT '0' COMMENT 'trace id',
`task_type` varchar(32) NOT NULL DEFAULT '' COMMENT '任务类型',
`task_context` mediumtext COMMENT '任务上下文,JSON',
`flow_id` bigint(20) unsigned NOT NULL DEFAULT '0' COMMENT '所在flow id',
`retry_times` int(11) NOT NULL DEFAULT '0' COMMENT '重试次数',
`status` varchar(32) NOT NULL DEFAULT 'INIT' COMMENT '运行状态: INIT|WAITING|RUNNING|FAILED|SUCCESS|CANCEL',
`fail_reason` mediumtext COMMENT '任务失败原因(如果执行失败)',
`begin_time` datetime DEFAULT '0000-01-01 00:00:00' COMMENT '任务开始时间',
`end_time` datetime DEFAULT '0000-01-01 00:00:00' COMMENT '任务结束时间',
`create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
PRIMARY KEY (`id`),
KEY `idx_tid` (`trace_id`),
KEY `idx_fid` (`flow_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='任务表';
ALTER TABLE `tf_task_flow` ADD COLUMN `param` text NULL COMMENT '任务的配置参数,JSON';
data/db/conf下创建mysql_custom.cnf文件
# data/db/conf下创建mysql_custom.cnf文件
[mysqld]
#server-id = 289503
autocommit=ON
auto_increment_increment=1
auto_increment_offset=1
back_log=3000
#basedir=/usr/local/mysql
character_set_server=utf8mb4
connect_timeout=10
core-file
#datadir=/etc/mysql/data
div_precision_increment=4
eq_range_index_dive_limit=200
event_scheduler=ON
general_log=OFF
group_concat_max_len=1024
innodb_adaptive_flushing=ON
innodb_adaptive_hash_index=ON
innodb_autoextend_increment=64
innodb_autoinc_lock_mode=2
innodb_buffer_pool_dump_pct=40
innodb_buffer_pool_instances=1
innodb_buffer_pool_load_at_startup=ON
innodb_buffer_pool_size=1073741824
innodb_change_buffering=all
innodb_checksum_algorithm=crc32
innodb_concurrency_tickets=5000
innodb_data_file_path=ibdata1:512M:autoextend
innodb_file_format=Barracuda
innodb_file_format_max=Barracuda
innodb_file_per_table
innodb_flush_log_at_trx_commit=1
innodb_flush_method=O_DIRECT
innodb_flush_neighbors=0
innodb_ft_max_token_size=84
innodb_ft_min_token_size=3
innodb_io_capacity=4000
innodb_io_capacity_max=8000
innodb_large_prefix=ON
innodb_lock_wait_timeout=5
innodb_log_buffer_size=8388608
innodb_log_file_size=536870912
innodb_lru_scan_depth=1024
innodb_max_dirty_pages_pct=75
innodb_old_blocks_pct=37
innodb_old_blocks_time=1000
innodb_online_alter_log_max_size=134217728
innodb_open_files=3000
innodb_page_cleaners=1
innodb_print_all_deadlocks=ON
innodb_purge_batch_size=300
innodb_purge_threads=4
innodb_read_ahead_threshold=56
innodb_read_io_threads=4
innodb_rollback_on_timeout=OFF
innodb_stats_method=nulls_equal
innodb_stats_on_metadata=OFF
innodb_stats_sample_pages=64
innodb_strict_mode=ON
innodb_table_locks=ON
innodb_thread_concurrency=0
innodb_thread_sleep_delay=10000
innodb_write_io_threads=4
interactive_timeout=1800
log_error=mysql-err.log
#log_output=FILE
log_queries_not_using_indexes=OFF
log_slow_admin_statements=ON
log_slow_slave_statements=ON
log_throttle_queries_not_using_indexes=10
log_timestamps=SYSTEM
long_query_time=5
loose-statistics_exclude_db=mysql;performance_schema;information_schema;test;PERFORMANCE_SCHEMA;INFORMATION_SCHEMA
loose-statistics_expire_duration=7
loose-statistics_plugin_status=1
lower_case_table_names=1
max_allowed_packet=16777216
max_connect_errors=1000
max_connections=3000
max_length_for_sort_data=1024
max_prepared_stmt_count=16382
max_write_lock_count=102400
min_examined_row_limit=100
net_read_timeout=30
net_retry_count=10
net_write_timeout=60
open_files_limit=65535
performance-schema-instrument='memory/%=COUNTED'
performance_schema=ON
pid_file=mysql.pid
query_alloc_block_size=8192
query_cache_limit=1048576
query_cache_size=3145728
query_cache_type=0
query_prealloc_size=8192
show_compatibility_56=ON
skip_external_locking
skip-name-resolve
slow_launch_time=2
slow_query_log_file=mysql-slow.log
slow_query_log=ON
socket=/tmp/mysql.sock
sql_mode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION
table_definition_cache=1400
table_open_cache=2000
#tmpdir=/etc/mysql/tmp_dir
tmp_table_size=2097152
transaction-isolation=REPEATABLE-READ
#user=mysql
wait_timeout=1800
# bin log config
#binlog_cache_size=32768
#binlog_checksum=CRC32
#binlog_format=ROW
#binlog_gtid_simple_recovery=TRUE
#binlog_row_image=full
#binlog_rows_query_log_events=ON
#binlog_stmt_cache_size=32768
#enforce_gtid_consistency=ON
#expire_logs_days=7
#gtid_mode=ON
#log_bin=mysql-bin.log
#log_bin_trust_function_creators=ON
#port=3306
#relay_log_recovery=ON
#sync_binlog=1
default-time_zone = '+8:00'
mpp-migrate/data/tf下面建application-docker.yml文件
# mpp-migrate/data/tf下面建application-docker.yml文件
taskflow:
master:
queues:
- name: default
namespace: default
priority: 10
concurrency: 10
- name: de
namespace: DE
priority: 100
concurrency: 10
execution-timeout: ${DE_FLOW_TIMEOUT:14400000}
启动脚本
在mpp-migrate目录下创建以下脚本。
1.start-db.sh
注意变更-v为完整路径。
docker run -d -p 13306:3306 \
-e MYSQL_ROOT_PASSWORD="L%)$ew=Ndq" \
-v mpp-migrate/data/db/conf:/etc/mysql/conf.d \
-v mpp-migrate/data/db/data:/var/lib/mysql \
-v mpp-migrate/data/db/init-script:/docker-entrypoint-initdb.d \
hub.cn-east-p1.netease.com/youdata/mysql:stable
2.start-redis.sh
docker run -d -p 16379:6379 \
-e ROLE=MASTER \
-e PERSISTENCE=RDB \
-v mpp-migrate/data/redis:/root/redis/data \
hub.c.163.com/yddocker/redis:6.2.4
3.start-da.sh
注意变更节点IP。
docker run -d -p 18080:8080 \
-e REDIS="redis://youdata@<ip>:16379" \
-e CUSTOMIZED_MEMORY="TRUE" \
-e DA_XMS=2048m \
-e DA_XMX=2048m \
hub.cn-east-p1.netease.com/youdata/da:release-2024-08-13-10-50-24-2e20ce
4.start-tf.sh
注意变更节点IP。注意变更-v为完整路径。
docker run -d \
-e REDIS="redis://youdata@<ip>:16379" \
-e DB="mysql://youdata:youdata@<ip>:13306/youdata" \
-e TF_PREFIX="mm" \
-v data/mpp-migrate/data/tf/application-docker.yml:/taskflow/application-docker.yml \
hub.cn-east-p1.netease.com/youdata/tf:release-2024-08-14-10-46-44-13935
5.start-de.sh
注意变更节点IP。注意变更-v为完整路径。注意ck配置。
docker run -d -p 18090:8090 \
-e DE_ENV=local \
-e DA=http://<ip>:18080 \
-e DE=http://<ip>:18090 \
-e REDIS="redis://youdata@<ip>:16379" \
-e DB="mysql://youdata:youdata@<ip>:13306/youdata" \
-e CSV_MODE="true" \
-e TF_PREFIX="mm" \
-e PLATFORM=http://<ip>:18082 \
-e MPP_TYPE="clickhouse" \
-e ENABLE_CK_EXCHANGE="false" \
-e CLICKHOUSE_CLUSTER="cluster2" \
-e CLICKHOUSE=clickhouse://<ck配置> \
-e CSV_MAX_BYTE_SIZE=40971520 \
-v data/mpp-migrate/logs/de:/DataExpedited/logs \
hub.cn-east-p1.netease.com/youdata/de:release-2024-08-13-10-50-56-e0ecb4
6.start-mm.sh
注意变更节点IP。
docker run -d -p 18082:8082 \
-e REDIS="redis://youdata@<ip>:16379" \
-e DE="http://<ip>:18090/etl/load-new" \
-e CLICKHOUSE_CLUSTER="cluster2" \
hub.cn-east-p1.netease.com/youdata/mpp-migrate:stable
6个容器启动之后,工具就算部署完成了。
文档反馈
以上内容对您是否有帮助?