Apache Hadoop Cluster Set Up script – Ubuntu

By | April 4, 2014

We will see step by step how we write a installation script for “Single-node Hadoop Cluster Set Up” for Ubuntu.

Assumptions:

  • This script is written for Ubuntu OS, you can change it accordingly for other OS, as the skeleton remains the same.
  • User has tar file in their system.(File check will be done inside script)
  • There is  a README.txt file for execution help.
  • HadoopSetUp.sh is the script to be run
  • commonFunctions.sh and properties.txt are the supporting files placed in the same folder where the HadoopSetUp.sh is residing.
  • Download Code

 

Lets dig into the details of what our script is doing!

Ex: To run, Login as root user and

$./HadoopSetUp.sh -t hadoop-1.2.0.tar.gz
#File – 1 of 4 : HadoopSetUp.sh


#!/bin/bash

########################################
######### Hadoop Installation ##########
######### Author: Puneetha B M #########
########################################

export _POSIX2_VERSION=199209

wdir=`dirname `
pushd "$wdir" >> /dev/null 2>&1 

We source all the necessary files inside our main script. Running the command source on a script executes the script within the context of the current process.


#Initialization
function f_initialize() {
source ./commonFunctions.sh
source ./properties.txt
source $HOME/.bashrc 

We define some global variables used later


function f_updateVariables() {
HADOOP_USER_PREFIX="su - ${hadoopGroupUser} -c"
}

function f_pre_install() {
variable="hello"

}

function f_displayParams() {
echo -e "----------------------------------------";
echo -e "\tParameters";
echo -e "----------------------------------------";
echo -e "Hadoop Group : ${hadoopGroup}";
echo -e "Hadoop Group User: ${hadoopGroupUser}";
echo -e "Tar File Name: ${tarFileName}";
echo -e "----------------------------------------";

}

function f_inputTarFileName(){
echo "Please enter the tar file name (Ex: hadoop-1.2.0.tar.gz)"

f_readInput "\ttar file name" ${tarFileName}
tarFileName=${REPLY}

echo -e "\tPlease confirm the tar file name"
echo -e "\t${e_bold}tar file name:${e_normal} $tarFileName"

if f_confirm "\tIs the details correct" Y; then
f_tarFileCheck $tarFileName
else
f_inputTarFileName
fi
}

#Set Parameters
function f_input() {
#Flag Variables description
#:h flag
#u: option

#Example to override default parameters
#hadoopGroupUser="override_user"

tarFileName=""
while getopts ":hg:u:t:" option; do
case "$option" in
h) f_info; f_usage ;;
g) hadoopGroup="$OPTARG" ;;
u) hadoopGroupUser="$OPTARG" ;;
t) tarFileName="$OPTARG" ;;
?) echo "Illegal option: $OPTARG"; f_usage ;;
esac
done

hadoopGroup=$(f_lowercase $hadoopGroup)
hadoopGroupUser=$(f_lowercase $hadoopGroupUser)
}

function f_inputCheck() {
#Read tar file name
if [ "z$tarFileName" = "z" ]; then
f_inputTarFileName
fi
f_tarFileCheck $tarFileName

f_groupExists ${hadoopGroup}
f_userExists ${hadoopGroupUser}

}

###############################################
############## Check Status ###################
###############################################

function f_groupExists(){
if [ ! -z "$(getent group )" ]; then
#group does exist
echo -e "Group  already exists. Please enter a new name";
f_readInput "\tGroup Name" $v_HadoopGroupTmp
v_HadoopGroupTmp=${REPLY}
f_groupExists ${v_HadoopGroupTmp}
else
echo -e "Creating Group "
#group does NOT exist

echo -e "\tPlease confirm the Group name"
echo -e "\t${e_bold}Group name:${e_normal} "

if f_confirm "\tIs the details correct" Y; then
#add user command here
hadoopGroup=""
addgroup 
echo -e "Group Name '' is saved"
else
f_readInput "\tPlease enter new Group Name" $v_HadoopGroupTmp1
v_HadoopGroupTmp1=${REPLY}
f_groupExists ${v_HadoopGroupTmp1}
fi
fi
}

function f_userExists(){
if [ ! -z "$(getent passwd )" ]; then
#user does exist
echo -e "User  already exists. Please enter a new name";
f_readInput "\tUser Name" $v_HadoopUserTmp
v_HadoopUserTmp=${REPLY}
f_userExists ${v_HadoopUserTmp}
else
echo -e "Creating user "
#user does NOT exist

echo -e "\tPlease confirm the user name"
echo -e "\t${e_bold}user name:${e_normal} "

if f_confirm "\tIs the details correct" Y; then
#add user command here
hadoopGroupUser=""
adduser --ingroup ${hadoopGroup} 
echo -e "User Name '' is saved"
else
f_readInput "\tPlease enter new User Name" $v_HadoopUserTmp1
v_HadoopUserTmp1=${REPLY}
f_userExists ${v_HadoopUserTmp1}
fi
fi
}

###############################################
###############################################

function f_pre_install() {
echo "Checking for Pre-Requisites"

#Check whether JDK is correctly set up
f_javaCheck

#Check whether ssh is installed
f_sshInstallCheck

echo "# Setting PATH variable" >> ~/.bashrc
echo "export PATH=$JAVA_HOME/bin:$PATH" >> ~/.bashrc
}

function f_previlege() {
echo -e "\nPre Requisites Installation Started...\n";

#Give sudo privileges for Hadoop system user
adduser ${hadoopGroupUser} sudo

#Generate an SSH key for the Hadoop system user
${HADOOP_USER_PREFIX} "ssh-keygen -t rsa -P ''"

#Enable SSH access to your local machine with this newly created key.
${HADOOP_USER_PREFIX} "cat /home/${hadoopGroupUser}/.ssh/id_rsa.pub >> /home/${hadoopGroupUser}/.ssh/authorized_keys"

}

#Connect to localhost --> ssh
function f_sshLoginCheck(){
echo -e "\n";

${HADOOP_USER_PREFIX} "cp /home/${hadoopGroupUser}/.bashrc /home/${hadoopGroupUser}/.bashrc.ssh.orig1"

echo -e "Sleeping";

${HADOOP_USER_PREFIX} "echo 'sleep 5; logout' >> /home/${hadoopGroupUser}/.bashrc"
${HADOOP_USER_PREFIX} "ssh ${hadoopGroupUser}@localhost"
${HADOOP_USER_PREFIX} "cp -f /home/${hadoopGroupUser}/.bashrc.ssh.orig1 /home/${hadoopGroupUser}/.bashrc"

}

# Function to Disable IPv6
function f_disable_ipv6(){
local sysctl_path="/etc/sysctl.conf"
#Creating a back up of the file
cp /etc/sysctl.conf /etc/sysctl.conf.orig

echo "# disable ipv6" >> ${sysctl_path}
echo "net.ipv6.conf.all.disable_ipv6 = 1" >> ${sysctl_path}
echo "net.ipv6.conf.default.disable_ipv6 = 1" >> ${sysctl_path}
echo "net.ipv6.conf.lo.disable_ipv6 = 1" >> ${sysctl_path}

#Restart Network
#Activate the change in the kernel without rebooting
sysctl -p ${sysctl_path}
}

function f_chk_disable_ipv6(){
local chk_disable_ipv6=`cat /proc/sys/net/ipv6/conf/all/disable_ipv6`;
if [ $chk_disable_ipv6 -eq 0 ]; then
echo "Please disable IPv6"
exit 0;
fi
}

function f_hadoop_install(){
#Moving Hadoop package to /usr/local location
cp ${tarFileName} /usr/local

pushd /usr/local
#Extract the contents of the Hadoop package
tar xzf ${tarFileName}

local tarFileBaseName_l=$(echo ${tarFileName} | awk -F ".tar.gz" '{ print  }')

#Renaming
mv ${tarFileBaseName_l} hadoop

#Make sure to change the owner of all the files to the hduser user and hadoop group
chown -R ${hadoopGroupUser}:${hadoopGroup} hadoop

popd
}

#Function to Update $HOME/.bashrc of Hadoop System User
function f_update_bashrc(){
local HADOOP_USER_HOME="/home/${hadoopGroupUser}/.bashrc"

cp ${HADOOP_USER_HOME} /home/${hadoopGroupUser}/.bashrc.orig2

local append_bashrc="
# Set Hadoop-related environment variables \n
export HADOOP_HOME=/usr/local/hadoop \n
# Set JAVA_HOME (we will also configure JAVA_HOME directly for Hadoop later on) \n
export JAVA_HOME=$JAVA_HOME \n
# Some convenient aliases and functions for running Hadoop-related commands \n
unalias fs &> /dev/null \n
alias fs=\"hadoop fs\" \n
unalias hls &> /dev/null \n
alias hls=\"fs -ls\" \n
# If you have LZO compression enabled in your Hadoop cluster and \n
# compress job outputs with LZOP (not covered in this tutorial): \n
# Conveniently inspect an LZOP compressed file from the command \n
# line; run via: \n
# \n
# $ lzohead /hdfs/path/to/lzop/compressed/file.lzo \n
# \n
# Requires installed 'lzop' command. \n
# \n
lzohead () { \n
hadoop fs -cat \ | lzop -dc | head -1000 | less \n
} \n
# Add Hadoop bin/ directory to PATH \n
export PATH=\$PATH:\$HADOOP_HOME/bin \n
"
echo -e "${append_bashrc}" >> ${HADOOP_USER_HOME}
}

#Function to edit hadoop-env
function f_hadoop_env_config(){
local hadoop_env_path="/usr/local/hadoop/conf/hadoop-env.sh"
echo "# The java implementation to use. Required." >> ${hadoop_env_path}
echo "export JAVA_HOME=$JAVA_HOME" >> ${hadoop_env_path}
}

function f_create_base_temp_dir(){
mkdir -p /app/hadoop/tmp
chown ${hadoopGroupUser}:${hadoopGroup} /app/hadoop/tmp
chmod 777 /app/hadoop/tmp
}

#Function to edit core-site.xml
function f_core_site(){
local core_site_path="/usr/local/hadoop/conf/core-site.xml"
local append_core_site="
\n
\t \t\thadoop.tmp.dir
\t\t/app/hadoop/tmp
\t\tA base for other temporary directories.
\t\n\n
\t \t\tfs.default.name
\t\thdfs://localhost:54310
\t\tThe name of the default file system. \n
\t\tA URI whose scheme and authority determine the FileSystem implementation.\n
\t\tThe uri scheme determines the config property (fs.SCHEME.impl)
naming the FileSystem implementation class. \n
\t\tThe uri authority is used to determine the host, port, etc. for a filesystem.\n
\t\t
\t\n
\n
"
echo -e $append_core_site > ${core_site_path}
#su - ${hadoopGroupUser} -c "echo -e ${append_core_site} > ${core_site_path}"
}

#Function to edit mapred-site.xml
function f_mapred_site(){
local mapred_site_path="/usr/local/hadoop/conf/mapred-site.xml"
local append_mapred_site="
\n
\t\n
\t\tmapred.job.tracker\n
\t\tlocalhost:54311\n
\t\tThe host and port that the MapReduce job tracker runs at. \n
\t\tIf "locall", then jobs are run in-process as a single map and reduce task.\n
\t\t\n
\t\n
\n
"
echo -e ${append_mapred_site} > ${mapred_site_path}
#su - ${hadoopGroupUser} -c "echo -e ${append_mapred_site} > ${mapred_site_path}"
}

#Function to edit hdfs-site.xml
function f_hdfs_site(){
local hdfs_site_path="/usr/local/hadoop/conf/hdfs-site.xml"

local append_hdfs_site="
\n
\t \t\tdfs.replication
\t\t1
\t\tDefault block replication.
\t\tThe actual number of replications can be specified when the file is created.
\t\tThe default is used if replication is not specified in create time.
\t\t
\t \n
"
echo -e ${append_hdfs_site} > ${hdfs_site_path}
#su - ${hadoopGroupUser} -c "echo -e ${append_hdfs_site} > ${hdfs_site_path}"
}

#Function to start Hadoop
function f_start_hadoop(){
su - ${hadoopGroupUser} -c "/usr/local/hadoop/bin/hadoop namenode -format"
su - ${hadoopGroupUser} -c "/usr/local/hadoop/bin/start-all.sh"
su - ${hadoopGroupUser} -c "$JAVA_HOME/bin/jps"
su - ${hadoopGroupUser} -c "/usr/local/hadoop/bin/stop-all.sh"
}

function main(){
local scriptStartTime_l=`date +%s`

#Initialization - import source files
f_initialize

# Check if Root user is executing the script
f_rootUserCheck

#Check for pre-installated softwares
f_pre_install

#Set Parameters
f_input $*
f_inputCheck
f_updateVariables
f_displayParams

#Pre-Install
f_previlege
f_sshLoginCheck

#Function to Disable IPv6
f_disable_ipv6
f_chk_disable_ipv6

#Hadoop Installation
f_hadoop_install

#Function to Update $HOME/.bashrc
f_update_bashrc

#Configuration
f_hadoop_env_config
f_create_base_temp_dir
f_core_site
f_mapred_site
f_hdfs_site

#Function to run Hadoop (Start and Stop)
f_start_hadoop
local scriptEndTime_l=`date +%s`
local scriptTotalTime_l=`echo "scale=0; ($scriptEndTime_l - $scriptStartTime_l)" | bc `
echo -e "Hadoop Installation Successful. (Total Time Taken is $scriptTotalTime_l seconds )"
echo -e "-----End of the Script-----";

popd >> /dev/null 2>&1
}
main $*



#Filename(Supporting File):  commonFunctions.sh 
#!/bin/bash

export _POSIX2_VERSION=199209

# Function to convert to uppercase
function f_uppercase(){
    echo  | tr '[a-z]' '[A-Z]'
}

# Function to convert to lowercase
function f_lowercase(){
    echo  | tr '[A-Z]' '[a-z]'
}

#Function to exit from the program
function f_die(){
    echo -e "";
    exit 1
}

# Function to check if Root user is executing the script
function f_rootUserCheck(){
    echo -e "Checking whether root user is executing the script";

    local name_l=`id -un`
    local uname_l=`f_uppercase $name_l`

    if [ "$uname_l" != "ROOT" ]; then
        f_die "Only user 'root' should execute this script." 
    fi
}

function f_tarFileCheck(){
    local v_fileName_l=""

    echo -e "\tChecking file $v_fileName_l"

    if [ ! -f "${v_fileName_l}" ]; then
	   echo -e "\t$v_fileName_l : File does not exist"
           f_inputTarFileName
    elif [ -f "${v_fileName_l}" ]; then
            echo -e "\tNew tar file name will be saved"
    elif [ ! -r "$v_fileName_l" ]; then
	    f_die "\t$v_fileName_l : File does not have read permission"
    else
        f_inputTarFileName        
    fi
}

#########################################################
############## Check for Installation ###################
#########################################################

#Function to check if ssh is installed
function f_sshInstallCheck(){
    echo -e "Cheking for ssh installtion";
    v_ssh=`which ssh`
    if [ "$v_ssh" = "/usr/bin/ssh" ]; then
        echo "ssh is installed"; 
    else     
        f_die  "Please install ssh"; 
    fi

    #$v_ssh_status=ssh $host "echo 2>&1" && echo $host OK || echo $host NOK

    #if [ "$v_ssh_status" = "NOK" ]; then
    #	f_die "Please install ssh before starting this script"
    #fi   
}

# Function to check if the system has been upgraded to work with JDK1.6.0 and above
function f_javaCheck(){
    echo -e "Checking for JAVA 1.6.0 or higher";

    local msgInstall="Please install java 1.6.0 or higher before running this script."
    local msgUpgrade="JAVA has to be upgraded to version 1.6.0 or higher before running this script."

    javaInstalled=`which java 2>&1`
    if [ "$?" -ne 0 ]; then
        f_die "$msgInstall"
    fi

    javaFullVerInfo=`java -fullversion 2>&1`
    if [ "$?" -ne 0 ]; then
        f_die "$msgInstall"
    fi

    javaFullVerInfo=`java -fullversion 2>&1 | awk '{print }' 2>&1`
    javaVerInfo=`echo $javaFullVerInfo | sed -e 's/"/''/g' -e 's/-/./g' -e 's/_/./g'`

    if [ "$javaVerInfo" = "" ]; then
         f_die "$msgInstall"
    fi
    javaSubVer1=`echo $javaVerInfo | awk -F. '{print }'`
    javaSubVer2=`echo $javaVerInfo | awk -F. '{print }'`
    javaSubVer3=`echo $javaVerInfo | awk -F. '{print }'`

    if [ ! $javaSubVer1 -ge 1 -o ! $javaSubVer2 -ge 6 -o ! $javaSubVer3 -ge 0 ]; then
        f_die "$msgUpgrade"
    fi

    echo -e "Found java $javaVerInfo installed on this system"

    if [ "$JAVA_HOME" = "" ]; then
    	f_die "Please set JAVA_HOME Environment variable before starting this script"
    fi

    local javacFlag_l=`find $JAVA_HOME/bin -name 'javac'`
    local javacFlag_l=`basename ${javacFlag_l}`
    if [ "$javacFlag_l" != "javac" ]; then
    	f_die "Please check whether java jdk is installed properly. We are unable to find javac executable file. It seems like you have installed only JRE. Also check whether JAVA_HOME environmetn variable is set properly"
    fi
}

##########################################
############## General ###################
##########################################

function f_decEscapeCharacters(){
    # Escape characters
    e_bold="\033[1m"
    e_underline="\033[4m"
    e_red="\033[0;31m"
    e_green="\033[0;32m"
    e_blue="\033[0;34m"
    e_normal="\033[0m"

    e_success="${e_bold}[${e_normal} ${e_green}OK${e_normal} ${e_bold}]${e_normal}"
    e_failure="${e_bold}[${e_normal} ${e_red}Failed${e_normal} ${e_bold}]${e_normal}"
}

function f_info(){
    echo -e "========================================================================================================================"
    echo -e "\t\t\t${e_bold}Hadoop Installer${e_normal}"

    echo -e "\tThis script will install Hadoop and setup single node cluster."
    echo -e "\t${e_red}Only root user should execute this script${e_normal}" 
    echo -e "========================================================================================================================"
}

function f_usage(){
    local scriptName=$(basename "")
    echo -e "${e_bold}USAGE${e_normal}"
    echo -e "\t$scriptName [-h] [-g ${e_underline}HadoopGroup${e_normal}] [-u ${e_underline}HadoopUser${e_normal}] -t ${e_underline}tarFileName${e_normal}"

    echo -e "${e_bold}OPTIONS${e_normal}"

    echo -e "\t${e_bold}-h${e_normal}\t\t\tHelp - Flag used to display 'usage help' for the script"    

    echo -e "\n\t${e_bold}-g ${e_underline}HadoopGroup${e_normal}\t\tHadoop Group Name - This group will be created (if not exists)"
    echo -e "\t\t\t\t  If HadoopGroup is not specified it defaults to ${e_bold}'hadoop'${e_normal}"

    echo -e "\n\t${e_bold}-u ${e_underline}HadoopUser${e_normal}\t\tHadoop User Name - This user will be created (if not exists)"
    echo -e "\t\t\t\t  If HadoopUser is not specified it defaults to ${e_bold}'hduser'${e_normal}"    

    echo -e "\n\t${e_bold}-t ${e_underline}tarFileName${e_normal}\t\tHadoop Tar File Name - This tar file will be used"
    echo -e "\t\t\t\tto set-up Hadoop environment. If tarFileName is not specified, then"
    echo -e "\t\t\t\tyou will be prompted to specify during the execution of the script"    

    echo -e "${e_bold}AUTHOR${e_normal}"
    echo -e "\t ${e_green}${e_bold}Puneetha B M${e_normal} - puneethabm@gmail.com \n"
    exit 0;
}

##########################################################
############## Input Related Functions ###################
##########################################################
# Read Input using prompt
function f_readInput(){
    local v_promptMsg=""
    local v_defaultVal="${2:-}"

    echo -e -n "${v_promptMsg} ${e_bold}[${e_normal} ${e_blue}${e_bold}${v_defaultVal}${e_normal} ${e_bold}]${e_normal} "
    read REPLY

    if [ -z "${REPLY}" ]; then
        REPLY=${v_defaultVal}
    fi
}

# Y/N Prompt
function f_confirm() {
    local v_promptMsg=""
    local v_defaultVal="${2:-}"

    if [ "${v_defaultVal}" = "Y" ]; then
        v_defaultPrompt="${e_blue}${e_bold}Y${e_normal}/n"
    elif [ "${v_defaultVal}" = "N" ]; then
        v_defaultPrompt="y/${e_blue}${e_bold}N${e_normal}"
    else
        v_defaultPrompt="y/n"
    fi

    echo -e -n "${v_promptMsg} ${e_bold}[${e_normal} ${v_defaultPrompt} ${e_bold}]${e_normal} "
    read REPLY

    if [ -z "${REPLY}" ]; then
        REPLY=${v_defaultVal}
    fi

    case "$REPLY" in
        Y*|y*) return 0 ;;
        N*|n*) return 1 ;;
    esac
}

##########################################################
##########################################################



#Filename: properties.txt


hadoopGroup="hadoop"
hadoopGroupUser="hduser"



Leave a Reply

Your email address will not be published. Required fields are marked *