背景

客户的mpp迁移需求,为了保障数据的一致性,迁移工具migrate应运而生。

具体部署步骤

搭建

mm为迁移工具,其作用是将生产抽取任务:将源端MPP里的表转到目标MPP的表。基于de的抽取能力进行转移,需要搭配:

tf(抽取任务调度)

da(源端数据查询)

mysql(de业务表)

redis(da/de依赖)

可以采用docker run 的形式启动5个容器,下文会有介绍。

文件准备

镜像下载:
59.111.178.240:20280/stable/mppmigrate/mpp-migrate.tar.gz
59.111.178.240:20280/stable/mppmigrate/da.tar.gz
59.111.178.240:20280/stable/mppmigrate/de.tar.gz
59.111.178.240:20280/stable/mppmigrate/mysql.tar.gz
59.111.178.240:20280/stable/mppmigrate/redis.tar.gz
59.111.178.240:20280/stable/mppmigrate/tf.tar.gz

然后建立migrate目录

mkdir mpp-migrate
mkdir -p mpp-migrate/logs/de
mkdir -p mpp-migrate/data/db
mkdir -p mpp-migrate/data/redis
mkdir -p mpp-migrate/data/tf
mkdir -p mpp-migrate/data/db/conf
mkdir -p mpp-migrate/data/db/data
mkdir -p mpp-migrate/data/db/init-script

data/db/init-script下创建init.sql文件


# data/db/init-script下创建init.sql文件

CREATE USER IF NOT EXISTS 'youdata'@'%' IDENTIFIED BY 'youdata';
GRANT ALL PRIVILEGES ON *.* TO 'youdata'@'%' WITH GRANT OPTION;
CREATE USER IF NOT EXISTS 'youdata'@'localhost' IDENTIFIED BY 'youdata';
GRANT ALL PRIVILEGES ON *.* TO 'youdata'@'localhost' WITH GRANT OPTION;
CREATE DATABASE IF NOT EXISTS youdata CHARACTER SET utf8mb4;

CREATE TABLE `youdata`.`tf_task_flow` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT 'id',
  `name` varchar(128) NOT NULL DEFAULT '' COMMENT '任务流名称',
  `flow_type` varchar(128) NOT NULL DEFAULT '' COMMENT '任务流名称',
  `namespace` varchar(128) NOT NULL DEFAULT '' COMMENT '任务流名称',
  `queue_name` varchar(128) NOT NULL DEFAULT 'default' COMMENT '任务流名称',
  `target_id` bigint(20) unsigned NOT NULL DEFAULT '0' COMMENT '目标id,用于指代任务流中的任务执行的对象,如分组、标签等',
  `target_type` varchar(32) NOT NULL DEFAULT '' COMMENT '目标类型',
  `priority` tinyint(4) NOT NULL DEFAULT '100' COMMENT '优先级',
  `trace_id` varchar(600) NOT NULL DEFAULT '{}' COMMENT '追踪信息',
  `build_context` mediumtext COMMENT '任务构建上下文,用于恢复任务链路,JSON',
  `task_graph` text COMMENT '任务图,基于邻接矩阵存储',
  `result_map` text COMMENT '执行结果map,JSON',
  `status` varchar(32) NOT NULL DEFAULT 'INIT' COMMENT '本次刷新状态: INIT|WAITING|RUNNING|FAILED|SUCCESS|CANCEL',
  `status_code` varchar(8) DEFAULT '100' COMMENT '状态码,作为状态补充,如异常时的异常码',
  `execute_node` varchar(200) DEFAULT '' COMMENT '执行节点',
  `fail_reason` mediumtext COMMENT '任务流失败原因(如果执行失败)',
  `submit_user` varchar(64) DEFAULT '' COMMENT '任务流提交者',
  `submit_time` datetime DEFAULT '0000-01-01 00:00:00' COMMENT '任务流提交时间',
  `execution_time` datetime DEFAULT '0000-01-01 00:00:00' COMMENT '任务流开始执行时间',
  `finish_time` datetime DEFAULT '0000-01-01 00:00:00' COMMENT '任务流执行结束时间',
  `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
  PRIMARY KEY (`id`),
  KEY `idx_tid` (`target_id`),
  KEY `idx_ttc` (`target_id`,`target_type`,`create_time`),
  KEY `idx_sc` (`status`,`create_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='任务流';

CREATE TABLE `youdata`.`tf_task` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT 'id',
  `trace_id` varchar(192) NOT NULL DEFAULT '0' COMMENT 'trace id',
  `task_type` varchar(32) NOT NULL DEFAULT '' COMMENT '任务类型',
  `task_context` mediumtext COMMENT '任务上下文,JSON',
  `flow_id` bigint(20) unsigned NOT NULL DEFAULT '0' COMMENT '所在flow id',
  `retry_times` int(11) NOT NULL DEFAULT '0' COMMENT '重试次数',
  `status` varchar(32) NOT NULL DEFAULT 'INIT' COMMENT '运行状态: INIT|WAITING|RUNNING|FAILED|SUCCESS|CANCEL',
  `fail_reason` mediumtext COMMENT '任务失败原因(如果执行失败)',
  `begin_time` datetime DEFAULT '0000-01-01 00:00:00' COMMENT '任务开始时间',
  `end_time` datetime DEFAULT '0000-01-01 00:00:00' COMMENT '任务结束时间',
  `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
  PRIMARY KEY (`id`),
  KEY `idx_tid` (`trace_id`),
  KEY `idx_fid` (`flow_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='任务表';

ALTER TABLE `tf_task_flow` ADD COLUMN `param` text NULL COMMENT '任务的配置参数,JSON';

data/db/conf下创建mysql_custom.cnf文件

# data/db/conf下创建mysql_custom.cnf文件

[mysqld]
#server-id = 289503
autocommit=ON
auto_increment_increment=1
auto_increment_offset=1
back_log=3000
#basedir=/usr/local/mysql
character_set_server=utf8mb4
connect_timeout=10
core-file
#datadir=/etc/mysql/data
div_precision_increment=4
eq_range_index_dive_limit=200
event_scheduler=ON
general_log=OFF
group_concat_max_len=1024
innodb_adaptive_flushing=ON
innodb_adaptive_hash_index=ON
innodb_autoextend_increment=64
innodb_autoinc_lock_mode=2
innodb_buffer_pool_dump_pct=40
innodb_buffer_pool_instances=1
innodb_buffer_pool_load_at_startup=ON
innodb_buffer_pool_size=1073741824
innodb_change_buffering=all
innodb_checksum_algorithm=crc32
innodb_concurrency_tickets=5000
innodb_data_file_path=ibdata1:512M:autoextend
innodb_file_format=Barracuda
innodb_file_format_max=Barracuda
innodb_file_per_table
innodb_flush_log_at_trx_commit=1
innodb_flush_method=O_DIRECT
innodb_flush_neighbors=0
innodb_ft_max_token_size=84
innodb_ft_min_token_size=3
innodb_io_capacity=4000
innodb_io_capacity_max=8000
innodb_large_prefix=ON
innodb_lock_wait_timeout=5
innodb_log_buffer_size=8388608
innodb_log_file_size=536870912
innodb_lru_scan_depth=1024
innodb_max_dirty_pages_pct=75
innodb_old_blocks_pct=37
innodb_old_blocks_time=1000
innodb_online_alter_log_max_size=134217728
innodb_open_files=3000
innodb_page_cleaners=1
innodb_print_all_deadlocks=ON
innodb_purge_batch_size=300
innodb_purge_threads=4
innodb_read_ahead_threshold=56
innodb_read_io_threads=4
innodb_rollback_on_timeout=OFF
innodb_stats_method=nulls_equal
innodb_stats_on_metadata=OFF
innodb_stats_sample_pages=64
innodb_strict_mode=ON
innodb_table_locks=ON
innodb_thread_concurrency=0
innodb_thread_sleep_delay=10000
innodb_write_io_threads=4
interactive_timeout=1800
log_error=mysql-err.log
#log_output=FILE
log_queries_not_using_indexes=OFF
log_slow_admin_statements=ON
log_slow_slave_statements=ON
log_throttle_queries_not_using_indexes=10
log_timestamps=SYSTEM
long_query_time=5
loose-statistics_exclude_db=mysql;performance_schema;information_schema;test;PERFORMANCE_SCHEMA;INFORMATION_SCHEMA
loose-statistics_expire_duration=7
loose-statistics_plugin_status=1
lower_case_table_names=1
max_allowed_packet=16777216
max_connect_errors=1000
max_connections=3000
max_length_for_sort_data=1024
max_prepared_stmt_count=16382
max_write_lock_count=102400
min_examined_row_limit=100
net_read_timeout=30
net_retry_count=10
net_write_timeout=60
open_files_limit=65535
performance-schema-instrument='memory/%=COUNTED'
performance_schema=ON
pid_file=mysql.pid
query_alloc_block_size=8192
query_cache_limit=1048576
query_cache_size=3145728
query_cache_type=0
query_prealloc_size=8192
show_compatibility_56=ON
skip_external_locking
skip-name-resolve
slow_launch_time=2
slow_query_log_file=mysql-slow.log
slow_query_log=ON
socket=/tmp/mysql.sock
sql_mode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION
table_definition_cache=1400
table_open_cache=2000
#tmpdir=/etc/mysql/tmp_dir
tmp_table_size=2097152
transaction-isolation=REPEATABLE-READ
#user=mysql
wait_timeout=1800
# bin log config
#binlog_cache_size=32768
#binlog_checksum=CRC32
#binlog_format=ROW
#binlog_gtid_simple_recovery=TRUE
#binlog_row_image=full
#binlog_rows_query_log_events=ON
#binlog_stmt_cache_size=32768
#enforce_gtid_consistency=ON
#expire_logs_days=7
#gtid_mode=ON
#log_bin=mysql-bin.log
#log_bin_trust_function_creators=ON
#port=3306
#relay_log_recovery=ON
#sync_binlog=1
default-time_zone = '+8:00'

mpp-migrate/data/tf下面建application-docker.yml文件

# mpp-migrate/data/tf下面建application-docker.yml文件
taskflow:
  master:
    queues:
      - name: default
        namespace: default
        priority: 10
        concurrency: 10
      - name: de
        namespace: DE
        priority: 100
        concurrency: 10
        execution-timeout: ${DE_FLOW_TIMEOUT:14400000}

启动脚本

在mpp-migrate目录下创建以下脚本。

1.start-db.sh

注意变更-v为完整路径。

docker run -d -p 13306:3306 \
-e MYSQL_ROOT_PASSWORD="L%)$ew=Ndq"  \
-v mpp-migrate/data/db/conf:/etc/mysql/conf.d \
-v mpp-migrate/data/db/data:/var/lib/mysql \
-v mpp-migrate/data/db/init-script:/docker-entrypoint-initdb.d \
hub.cn-east-p1.netease.com/youdata/mysql:stable

2.start-redis.sh

docker run -d -p 16379:6379 \
-e ROLE=MASTER \
-e PERSISTENCE=RDB \
-v mpp-migrate/data/redis:/root/redis/data \
hub.c.163.com/yddocker/redis:6.2.4

3.start-da.sh

注意变更节点IP。

docker run -d -p 18080:8080 \
-e REDIS="redis://youdata@<ip>:16379" \
-e CUSTOMIZED_MEMORY="TRUE" \
-e DA_XMS=2048m \
-e DA_XMX=2048m \
hub.cn-east-p1.netease.com/youdata/da:release-2024-08-13-10-50-24-2e20ce

4.start-tf.sh

注意变更节点IP。注意变更-v为完整路径。

docker run -d \
-e REDIS="redis://youdata@<ip>:16379" \
-e DB="mysql://youdata:youdata@<ip>:13306/youdata" \
-e TF_PREFIX="mm" \
-v data/mpp-migrate/data/tf/application-docker.yml:/taskflow/application-docker.yml \
hub.cn-east-p1.netease.com/youdata/tf:release-2024-08-14-10-46-44-13935

5.start-de.sh

注意变更节点IP。注意变更-v为完整路径。注意ck配置。

docker run -d  -p 18090:8090 \
-e DE_ENV=local \
-e DA=http://<ip>:18080 \
-e DE=http://<ip>:18090 \
-e REDIS="redis://youdata@<ip>:16379" \
-e DB="mysql://youdata:youdata@<ip>:13306/youdata" \
-e CSV_MODE="true" \
-e TF_PREFIX="mm" \
-e PLATFORM=http://<ip>:18082 \
-e MPP_TYPE="clickhouse" \
-e ENABLE_CK_EXCHANGE="false" \
-e CLICKHOUSE_CLUSTER="cluster2" \
-e CLICKHOUSE=clickhouse://<ck配置> \
-e CSV_MAX_BYTE_SIZE=40971520 \
-v data/mpp-migrate/logs/de:/DataExpedited/logs   \
 hub.cn-east-p1.netease.com/youdata/de:release-2024-08-13-10-50-56-e0ecb4

6.start-mm.sh

注意变更节点IP。

docker run -d -p 18082:8082 \
-e REDIS="redis://youdata@<ip>:16379" \
-e DE="http://<ip>:18090/etl/load-new" \
-e CLICKHOUSE_CLUSTER="cluster2"  \
hub.cn-east-p1.netease.com/youdata/mpp-migrate:stable

6个容器启动之后,工具就算部署完成了。