INFO-数据地图回收站使用

适用模块
数据地图、离线开发
具体说明
数据地图血缘
使用示例
CREATE TABLE
create table output as select * from input;

INSERT INTO/OVERWRITE
insert into table output select * from input;

insert overwrite table output select * from input;

WITH tempTable AS (...)
with temp as (select * from a);

with temp as (create table output as select * from input);

注:
hive可以解析出血缘,spark暂不支持解析血缘。

ALTER TABLE RENAMEmetastore
alter table t1 rename to t2

DROP TABLE(metastore)
drop table t

2.新增支持场景
【血缘】提升血缘覆盖率-支持importexportload等场景 

IMPORT(仅hive支持)
# 任务 -> 表
import table ex_table from '/user/data_transform/test/ex_table';

EXPORT(仅hive支持)
# 表 -> 任务
export table ex_table to '/user/data_transform/test/ex_table';

LOAD
无表血缘,有产出信息
# 任务 -> 表
load data local inpath '/home/hadoop/tlf/ex_table.txt' INTO TABLE ex_table;

ALTER TABLE ... SET LOCATION
无表血缘,有产出信息
alter table ex_table set location '/user/data_transform/test/ex_table1';

ALTER TABLE ... ADD PARTITON
无表血缘,有产出信息
alter table par_tbl add partition (dt = '2022-06-16') location '/user/data_transform/test/ex_table';

3.新增支持场景(202212
主要支持HiveMySQLOracle之间的读写场景,打通不同数据源之间的依赖,主要是针对Spark Jar类型任务
MySQL2Hive
val url = "jdbc:mysql://xxx"

val table = "mysqltable"

val params: Map[String, String] = Map(
  "url" -> url,
  "dbtable" -> table,
  "driver" -> "com.mysql.jdbc.Driver",
  "user" -> "xxx",
  "password" -> "xxx"
)

val properties = new Properties
params.foreach { case (key, value) => properties.setProperty(key, value) }


val df = spark.sqlContext.read.jdbc(url, table, properties)

df.write.saveAsTable("db.table")

Hive2MySQL
val df = spark.sqlContext.sql("select * from db.table")

val url = "jdbc:mysql://xxx"

val table = "mysql_lineage_02"


val params: Map[String, String] = Map(
  "url" -> url,
  "dbtable" -> table,
  "driver" -> "com.mysql.jdbc.Driver",
  "user" -> "xxx",
  "password" -> "xxx"
)

val properties = new Properties
params.foreach { case (key, value) => properties.setProperty(key, value) }

df.write.mode(SaveMode.Append).jdbc(url, table, properties)

Oracle2Hive
val url = "jdbc:oracle:thin:@xxx"

val table = "oracle_lineage_03"

val params: Map[String, String] = Map(
  "url" -> url,
  "dbtable" -> table,
  "driver" -> "oracle.jdbc.OracleDriver",
  "user" -> "xxx",
  "password" -> "xxx"
)

val properties = new Properties
params.foreach { case (key, value) => properties.setProperty(key, value) }


val df = spark.sqlContext.read.jdbc(url, table, properties)

df.write.saveAsTable("db.table")

Hive2Oracle
val df = spark.sqlContext.sql("select * from db.table")

val url = "jdbc:oracle:thin:@hadoop336.photo.163.org:49161:XE"

val table = "oracletable"


val params: Map[String, String] = Map(
  "url" -> url,
  "dbtable" -> table,
  "driver" -> "oracle.jdbc.OracleDriver",
  "user" -> "xxx",
  "password" -> "xxx"
)

val properties = new Properties
params.foreach { case (key, value) => properties.setProperty(key, value) }

df.write.mode(SaveMode.Append).jdbc(url, table, properties)


4.新增支持场景
CREATE VIEW
create view if not exists view_01 as select * from table_01;

ALTER VIEW
alter view view_01 select * from table_02;

ALTER VIEW RENAME
alter view view_01 rename to view_02;

DROP VIEW
drop view if exists view_01

作者:焦巍