需求:
有一个需求,就是需要从hive通过sqoop导数据回oracle,但是导入过程需要用另外一张表记录导入过程,比如导入的时间、导入是否准备中或者运行中、成功或者失败,因为别人需要通过这个标志来触发一些处理。这里就可以用shell脚本结合sqoop的–query来实现。
其中IS_FLAG :1为准备中 2为运行中 0为完成 -1为异常出错失败
#!/bin/bash
source ExitCodeCheck.sh
#队列
queueName=$1
y=${2:0:4}
m=${2:4:2}
d=${2:6:2}
#1全量 2增量
flag=$3
DIR=`dirname "$0"`
DIR=`cd "${DIR}" > /dev/null; pwd`
oracle_connection=jdbc:oracle:thin:@ip:1531:kuming
oracle_name=name
oracle_password=password
#定义table name
TABLE_NAME=SHUZHU.BIAOMING
##准备 1
echo "准备中 1"
sqoop eval -D mapred.job.queue.name=${queueName} \
--connect ${oracle_connection} \
--username ${oracle_name} \
--password ${oracle_password} \
--query "insert into shuzhu.jilubiao values ('biaoji',to_char(sysdate,'yyyymmdd'),'1',sysdate,NULL,'开始清空表')"
exitCodeCheck $?
##调用存储过程清空表
echo "begin to truncate source table data"
sqoop eval -D mapred.job.queue.name=${queueName} \
--connect ${oracle_connection} \
--username ${oracle_name} \
--password ${oracle_password} \
--query "BEGIN SHUZHU.CUNCHUGUOCHENGMING(truncate_table => 'XXXX');end;"
exitFlag=$?
echo "finished to truncate source table data"
##运行中 2
exitFlagOracle="2"
if [ ${exitFlag} -ne 0 ]
then
exitFlagOracle = "-1"
fi
echo "运行中 2"
sqoop eval -D mapred.job.queue.name=${queueName} \
--connect ${oracle_connection} \
--username ${oracle_name} \
--password ${oracle_password} \
--query "update SHUZHU.JILUBIAO set IS_FLAG="\'${exitFlagOracle}\'",TASK_DESC='清空表完成,同步数据开始。。。' where trunc(start_dt,'DD') = trunc(sysdate,'DD') and IS_FLAG!='0' and IS_FLAG!='-1'"
exitCodeCheck $?
exitCodeCheck ${exitFlag}
unset exitFlag
unset exitFlagOracle
##########
echo "begin to export data to oracle"
echo "flag 1全量 2 增量 flag: ${flag}"
if [ ${flag} -eq 1 ]
then
sqoop export-D mapred.job.queue.name=${queueName} \
--connect ${oracle_connection} \
--username ${oracle_name} \
--password ${oracle_password} \
--input-fiedls-terminated-by '\001' \
--input-lines-terminated-by '\n' \
--input-null-non-string '\\n' \
--input-null-string '\\n' \
--num-mapper 20 \
--export-dir /user/hive/warehouse/kuming.db/biaoming
fi
##biaoming_delta是一个分区表每天数据都会同步到这个分区表中,也是个临时表,为何用这个临时表可以参考前面的sqoop增更新的那篇文章
if [ ${flag} -eq 2 ]
then
sqoop export-D mapred.job.queue.name=${queueName} \
--connect ${oracle_connection} \
--username ${oracle_name} \
--password ${oracle_password} \
--input-fiedls-terminated-by '\001' \
--input-lines-terminated-by '\n' \
--input-null-non-string '\\n' \
--input-null-string '\\n' \
--num-mapper 20 \
--export-dir /user/hive/warehouse/kuming.db/biaoming_delta/y=${y}/m=${m}/d=${d}
fi
exitFlag=$?
echo "finished to export o pacrpt"
##完成 0
exitFlagOracle="0"
echo "完成 0"
if [ ${exitFlag} -ne 0 ]
then
exitFlagOracle="-1";
fi
sqoop eval -D mapred.job.queue.name=${queueName} \
--connect ${oracle_connection} \
--username ${oracle_name} \
--password ${oracle_password} \
--query "update SHUZHU.JILUBIAO set IS_FLAG="\'${exitFlagOracle}\'",TASK_DESC='hadoop导入数据到oracle结束',END_DT=sysdate where trunc(start_dt,'DD') and IS_FLAG!='0' and IS_FLAG!='-1'"
exitCodeCheck $?
exitCodeCheck ${exitFlag}
unset exit Flag
unset exitFlagOracle
echo "执行完成"