MapReduce工作笔记——Hadoop MR Streaming通用模板

时间:2022-07-24
本文章向大家介绍MapReduce工作笔记——Hadoop MR Streaming通用模板,主要内容包括其使用实例、应用技巧、基本知识点总结和需要注意事项,具有一定的参考价值,需要的朋友可以参考一下。

模板概览

#!/bin/sh

#  #-----------------------------------#
#   _   _           _
#  | | | | __ _  __| | ___   ___  _ __
#  | |_| |/ _` |/ _` |/ _  / _ | '_ 
#  |  _  | (_| | (_| | (_) | (_) | |_) |
#  |_| |_|__,_|__,_|___/ ___/| .__/
#                                 |_|
#  #-----------------------------------#
#  Created on 2018.11.13
#  Latest modified on 2018.11.13
#  @author: wangcongying
#  #-----------------------------------#
#  MAPPER:   ${CURDIR}/mapper.py
#  REDUCER:  ${CURDIR}/reducer.py
#  #-----------------------------------#

if [ $# != 1 ] ; then
    echo "***********************"
    echo "Parameter error !!!"
    echo "USAGE: ./TestHadoopJob.sh "
    echo " e.g.:"
    echo "***********************"
exit 1;
fi

#  ${HADOOP_HOME}: HADOOP 路径
HADOOP_HOME=""
HDP="$HADOOP_HOME/bin/hadoop fs"

MY_PATH=$(dirname $0)
CUR_DIR=`dirname $(readlink -f $0)`

Today=`date +%Y%m%d`

#  ${INPUT}: HDFS 输入路径
#  ${OUTDIR}: HDFS 输出路径
#  ${JOB_NAME}: MR JOB 命名
INPUT=""
OUTDIR=""
JOB_NAME="wangcongying_${Today}"

echo '===============================================' >> $MY_PATH/hadoop_screen.ans
$HDP -rmr $OUTDIR
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/contrib/streaming/hadoop-streaming.jar 
    -D mapred.job.max.map.running=500 
    -D mapred.job.max.reduce.running=500 
    -D mapred.ignore.badcompress="true" 
    -D mapred.use.multimembergzip="true" 
    -D mapred.max.map.failures.percent=3 
    -D stream.num.map.output.key.fields=1 
    -D mapred.text.key.comparator.options="-k1,1" 
    -D mapred.job.name=$JOB_NAME 
    -D mapred.job.priority="VERY_HIGH" 
    -jobconf mapred.reduce.tasks=100 
    -jobconf mapred.child.env="LANG=en_US.UTF-8,LC_ALL=en_US.UTF-8" 
    -mapper "mapper.py" 
    -reducer "reducer.py"  
    -input ${INPUT} 
    -output ${OUTDIR} 
    -file "$MY_PATH/reducer.py" 
    -file "$MY_PATH/mapper.py" 
2>&1 | tee -a $MY_PATH/hadoop_screen.ans

$HDP -test -e ${OUTDIR}/_SUCCESS
if [ $? -ne 0 ]; then
    echo "ERROR: Hadoop job Hubble Launcher Merge failed, quit!"
    exit 1;
fi
    exit 0;