We will see step by step how we write a installation script for “Single-node Hadoop Cluster Set Up” for Ubuntu.
Assumptions:
- This script is written for Ubuntu OS, you can change it accordingly for other OS, as the skeleton remains the same.
- User has tar file in their system.(File check will be done inside script)
- There is a README.txt file for execution help.
- HadoopSetUp.sh is the script to be run
- commonFunctions.sh and properties.txt are the supporting files placed in the same folder where the HadoopSetUp.sh is residing.
- Download Code
Lets dig into the details of what our script is doing!
Ex: To run, Login as root user and
$./HadoopSetUp.sh -t hadoop-1.2.0.tar.gz
#File – 1 of 4 : HadoopSetUp.sh
#!/bin/bash ######################################## ######### Hadoop Installation ########## ######### Author: Puneetha B M ######### ######################################## export _POSIX2_VERSION=199209 wdir=`dirname ` pushd "$wdir" >> /dev/null 2>&1
We source all the necessary files inside our main script. Running the command source on a script executes the script within the context of the current process.
#Initialization function f_initialize() { source ./commonFunctions.sh source ./properties.txt source $HOME/.bashrc
We define some global variables used later
function f_updateVariables() { HADOOP_USER_PREFIX="su - ${hadoopGroupUser} -c" } function f_pre_install() { variable="hello" } function f_displayParams() { echo -e "----------------------------------------"; echo -e "\tParameters"; echo -e "----------------------------------------"; echo -e "Hadoop Group : ${hadoopGroup}"; echo -e "Hadoop Group User: ${hadoopGroupUser}"; echo -e "Tar File Name: ${tarFileName}"; echo -e "----------------------------------------"; } function f_inputTarFileName(){ echo "Please enter the tar file name (Ex: hadoop-1.2.0.tar.gz)" f_readInput "\ttar file name" ${tarFileName} tarFileName=${REPLY} echo -e "\tPlease confirm the tar file name" echo -e "\t${e_bold}tar file name:${e_normal} $tarFileName" if f_confirm "\tIs the details correct" Y; then f_tarFileCheck $tarFileName else f_inputTarFileName fi } #Set Parameters function f_input() { #Flag Variables description #:h flag #u: option #Example to override default parameters #hadoopGroupUser="override_user" tarFileName="" while getopts ":hg:u:t:" option; do case "$option" in h) f_info; f_usage ;; g) hadoopGroup="$OPTARG" ;; u) hadoopGroupUser="$OPTARG" ;; t) tarFileName="$OPTARG" ;; ?) echo "Illegal option: $OPTARG"; f_usage ;; esac done hadoopGroup=$(f_lowercase $hadoopGroup) hadoopGroupUser=$(f_lowercase $hadoopGroupUser) } function f_inputCheck() { #Read tar file name if [ "z$tarFileName" = "z" ]; then f_inputTarFileName fi f_tarFileCheck $tarFileName f_groupExists ${hadoopGroup} f_userExists ${hadoopGroupUser} } ############################################### ############## Check Status ################### ############################################### function f_groupExists(){ if [ ! -z "$(getent group )" ]; then #group does exist echo -e "Group already exists. Please enter a new name"; f_readInput "\tGroup Name" $v_HadoopGroupTmp v_HadoopGroupTmp=${REPLY} f_groupExists ${v_HadoopGroupTmp} else echo -e "Creating Group " #group does NOT exist echo -e "\tPlease confirm the Group name" echo -e "\t${e_bold}Group name:${e_normal} " if f_confirm "\tIs the details correct" Y; then #add user command here hadoopGroup="" addgroup echo -e "Group Name '' is saved" else f_readInput "\tPlease enter new Group Name" $v_HadoopGroupTmp1 v_HadoopGroupTmp1=${REPLY} f_groupExists ${v_HadoopGroupTmp1} fi fi } function f_userExists(){ if [ ! -z "$(getent passwd )" ]; then #user does exist echo -e "User already exists. Please enter a new name"; f_readInput "\tUser Name" $v_HadoopUserTmp v_HadoopUserTmp=${REPLY} f_userExists ${v_HadoopUserTmp} else echo -e "Creating user " #user does NOT exist echo -e "\tPlease confirm the user name" echo -e "\t${e_bold}user name:${e_normal} " if f_confirm "\tIs the details correct" Y; then #add user command here hadoopGroupUser="" adduser --ingroup ${hadoopGroup} echo -e "User Name '' is saved" else f_readInput "\tPlease enter new User Name" $v_HadoopUserTmp1 v_HadoopUserTmp1=${REPLY} f_userExists ${v_HadoopUserTmp1} fi fi } ############################################### ############################################### function f_pre_install() { echo "Checking for Pre-Requisites" #Check whether JDK is correctly set up f_javaCheck #Check whether ssh is installed f_sshInstallCheck echo "# Setting PATH variable" >> ~/.bashrc echo "export PATH=$JAVA_HOME/bin:$PATH" >> ~/.bashrc } function f_previlege() { echo -e "\nPre Requisites Installation Started...\n"; #Give sudo privileges for Hadoop system user adduser ${hadoopGroupUser} sudo #Generate an SSH key for the Hadoop system user ${HADOOP_USER_PREFIX} "ssh-keygen -t rsa -P ''" #Enable SSH access to your local machine with this newly created key. ${HADOOP_USER_PREFIX} "cat /home/${hadoopGroupUser}/.ssh/id_rsa.pub >> /home/${hadoopGroupUser}/.ssh/authorized_keys" } #Connect to localhost --> ssh function f_sshLoginCheck(){ echo -e "\n"; ${HADOOP_USER_PREFIX} "cp /home/${hadoopGroupUser}/.bashrc /home/${hadoopGroupUser}/.bashrc.ssh.orig1" echo -e "Sleeping"; ${HADOOP_USER_PREFIX} "echo 'sleep 5; logout' >> /home/${hadoopGroupUser}/.bashrc" ${HADOOP_USER_PREFIX} "ssh ${hadoopGroupUser}@localhost" ${HADOOP_USER_PREFIX} "cp -f /home/${hadoopGroupUser}/.bashrc.ssh.orig1 /home/${hadoopGroupUser}/.bashrc" } # Function to Disable IPv6 function f_disable_ipv6(){ local sysctl_path="/etc/sysctl.conf" #Creating a back up of the file cp /etc/sysctl.conf /etc/sysctl.conf.orig echo "# disable ipv6" >> ${sysctl_path} echo "net.ipv6.conf.all.disable_ipv6 = 1" >> ${sysctl_path} echo "net.ipv6.conf.default.disable_ipv6 = 1" >> ${sysctl_path} echo "net.ipv6.conf.lo.disable_ipv6 = 1" >> ${sysctl_path} #Restart Network #Activate the change in the kernel without rebooting sysctl -p ${sysctl_path} } function f_chk_disable_ipv6(){ local chk_disable_ipv6=`cat /proc/sys/net/ipv6/conf/all/disable_ipv6`; if [ $chk_disable_ipv6 -eq 0 ]; then echo "Please disable IPv6" exit 0; fi } function f_hadoop_install(){ #Moving Hadoop package to /usr/local location cp ${tarFileName} /usr/local pushd /usr/local #Extract the contents of the Hadoop package tar xzf ${tarFileName} local tarFileBaseName_l=$(echo ${tarFileName} | awk -F ".tar.gz" '{ print }') #Renaming mv ${tarFileBaseName_l} hadoop #Make sure to change the owner of all the files to the hduser user and hadoop group chown -R ${hadoopGroupUser}:${hadoopGroup} hadoop popd } #Function to Update $HOME/.bashrc of Hadoop System User function f_update_bashrc(){ local HADOOP_USER_HOME="/home/${hadoopGroupUser}/.bashrc" cp ${HADOOP_USER_HOME} /home/${hadoopGroupUser}/.bashrc.orig2 local append_bashrc=" # Set Hadoop-related environment variables \n export HADOOP_HOME=/usr/local/hadoop \n # Set JAVA_HOME (we will also configure JAVA_HOME directly for Hadoop later on) \n export JAVA_HOME=$JAVA_HOME \n # Some convenient aliases and functions for running Hadoop-related commands \n unalias fs &> /dev/null \n alias fs=\"hadoop fs\" \n unalias hls &> /dev/null \n alias hls=\"fs -ls\" \n # If you have LZO compression enabled in your Hadoop cluster and \n # compress job outputs with LZOP (not covered in this tutorial): \n # Conveniently inspect an LZOP compressed file from the command \n # line; run via: \n # \n # $ lzohead /hdfs/path/to/lzop/compressed/file.lzo \n # \n # Requires installed 'lzop' command. \n # \n lzohead () { \n hadoop fs -cat \ | lzop -dc | head -1000 | less \n } \n # Add Hadoop bin/ directory to PATH \n export PATH=\$PATH:\$HADOOP_HOME/bin \n " echo -e "${append_bashrc}" >> ${HADOOP_USER_HOME} } #Function to edit hadoop-env function f_hadoop_env_config(){ local hadoop_env_path="/usr/local/hadoop/conf/hadoop-env.sh" echo "# The java implementation to use. Required." >> ${hadoop_env_path} echo "export JAVA_HOME=$JAVA_HOME" >> ${hadoop_env_path} } function f_create_base_temp_dir(){ mkdir -p /app/hadoop/tmp chown ${hadoopGroupUser}:${hadoopGroup} /app/hadoop/tmp chmod 777 /app/hadoop/tmp } #Function to edit core-site.xml function f_core_site(){ local core_site_path="/usr/local/hadoop/conf/core-site.xml" local append_core_site=" \n \t \t\thadoop.tmp.dir \t\t/app/hadoop/tmp \t\tA base for other temporary directories. \t\n\n \t \t\tfs.default.name \t\thdfs://localhost:54310 \t\tThe name of the default file system. \n \t\tA URI whose scheme and authority determine the FileSystem implementation.\n \t\tThe uri scheme determines the config property (fs.SCHEME.impl) naming the FileSystem implementation class. \n \t\tThe uri authority is used to determine the host, port, etc. for a filesystem.\n \t\t \t\n \n " echo -e $append_core_site > ${core_site_path} #su - ${hadoopGroupUser} -c "echo -e ${append_core_site} > ${core_site_path}" } #Function to edit mapred-site.xml function f_mapred_site(){ local mapred_site_path="/usr/local/hadoop/conf/mapred-site.xml" local append_mapred_site=" \n \t\n \t\tmapred.job.tracker\n \t\tlocalhost:54311\n \t\tThe host and port that the MapReduce job tracker runs at. \n \t\tIf "locall", then jobs are run in-process as a single map and reduce task.\n \t\t\n \t\n \n " echo -e ${append_mapred_site} > ${mapred_site_path} #su - ${hadoopGroupUser} -c "echo -e ${append_mapred_site} > ${mapred_site_path}" } #Function to edit hdfs-site.xml function f_hdfs_site(){ local hdfs_site_path="/usr/local/hadoop/conf/hdfs-site.xml" local append_hdfs_site=" \n \t \t\tdfs.replication \t\t1 \t\tDefault block replication. \t\tThe actual number of replications can be specified when the file is created. \t\tThe default is used if replication is not specified in create time. \t\t \t \n " echo -e ${append_hdfs_site} > ${hdfs_site_path} #su - ${hadoopGroupUser} -c "echo -e ${append_hdfs_site} > ${hdfs_site_path}" } #Function to start Hadoop function f_start_hadoop(){ su - ${hadoopGroupUser} -c "/usr/local/hadoop/bin/hadoop namenode -format" su - ${hadoopGroupUser} -c "/usr/local/hadoop/bin/start-all.sh" su - ${hadoopGroupUser} -c "$JAVA_HOME/bin/jps" su - ${hadoopGroupUser} -c "/usr/local/hadoop/bin/stop-all.sh" } function main(){ local scriptStartTime_l=`date +%s` #Initialization - import source files f_initialize # Check if Root user is executing the script f_rootUserCheck #Check for pre-installated softwares f_pre_install #Set Parameters f_input $* f_inputCheck f_updateVariables f_displayParams #Pre-Install f_previlege f_sshLoginCheck #Function to Disable IPv6 f_disable_ipv6 f_chk_disable_ipv6 #Hadoop Installation f_hadoop_install #Function to Update $HOME/.bashrc f_update_bashrc #Configuration f_hadoop_env_config f_create_base_temp_dir f_core_site f_mapred_site f_hdfs_site #Function to run Hadoop (Start and Stop) f_start_hadoop local scriptEndTime_l=`date +%s` local scriptTotalTime_l=`echo "scale=0; ($scriptEndTime_l - $scriptStartTime_l)" | bc ` echo -e "Hadoop Installation Successful. (Total Time Taken is $scriptTotalTime_l seconds )" echo -e "-----End of the Script-----"; popd >> /dev/null 2>&1 } main $*
#Filename(Supporting File): commonFunctions.sh #!/bin/bash export _POSIX2_VERSION=199209 # Function to convert to uppercase function f_uppercase(){ echo | tr '[a-z]' '[A-Z]' } # Function to convert to lowercase function f_lowercase(){ echo | tr '[A-Z]' '[a-z]' } #Function to exit from the program function f_die(){ echo -e ""; exit 1 } # Function to check if Root user is executing the script function f_rootUserCheck(){ echo -e "Checking whether root user is executing the script"; local name_l=`id -un` local uname_l=`f_uppercase $name_l` if [ "$uname_l" != "ROOT" ]; then f_die "Only user 'root' should execute this script." fi } function f_tarFileCheck(){ local v_fileName_l="" echo -e "\tChecking file $v_fileName_l" if [ ! -f "${v_fileName_l}" ]; then echo -e "\t$v_fileName_l : File does not exist" f_inputTarFileName elif [ -f "${v_fileName_l}" ]; then echo -e "\tNew tar file name will be saved" elif [ ! -r "$v_fileName_l" ]; then f_die "\t$v_fileName_l : File does not have read permission" else f_inputTarFileName fi } ######################################################### ############## Check for Installation ################### ######################################################### #Function to check if ssh is installed function f_sshInstallCheck(){ echo -e "Cheking for ssh installtion"; v_ssh=`which ssh` if [ "$v_ssh" = "/usr/bin/ssh" ]; then echo "ssh is installed"; else f_die "Please install ssh"; fi #$v_ssh_status=ssh $host "echo 2>&1" && echo $host OK || echo $host NOK #if [ "$v_ssh_status" = "NOK" ]; then # f_die "Please install ssh before starting this script" #fi } # Function to check if the system has been upgraded to work with JDK1.6.0 and above function f_javaCheck(){ echo -e "Checking for JAVA 1.6.0 or higher"; local msgInstall="Please install java 1.6.0 or higher before running this script." local msgUpgrade="JAVA has to be upgraded to version 1.6.0 or higher before running this script." javaInstalled=`which java 2>&1` if [ "$?" -ne 0 ]; then f_die "$msgInstall" fi javaFullVerInfo=`java -fullversion 2>&1` if [ "$?" -ne 0 ]; then f_die "$msgInstall" fi javaFullVerInfo=`java -fullversion 2>&1 | awk '{print }' 2>&1` javaVerInfo=`echo $javaFullVerInfo | sed -e 's/"/''/g' -e 's/-/./g' -e 's/_/./g'` if [ "$javaVerInfo" = "" ]; then f_die "$msgInstall" fi javaSubVer1=`echo $javaVerInfo | awk -F. '{print }'` javaSubVer2=`echo $javaVerInfo | awk -F. '{print }'` javaSubVer3=`echo $javaVerInfo | awk -F. '{print }'` if [ ! $javaSubVer1 -ge 1 -o ! $javaSubVer2 -ge 6 -o ! $javaSubVer3 -ge 0 ]; then f_die "$msgUpgrade" fi echo -e "Found java $javaVerInfo installed on this system" if [ "$JAVA_HOME" = "" ]; then f_die "Please set JAVA_HOME Environment variable before starting this script" fi local javacFlag_l=`find $JAVA_HOME/bin -name 'javac'` local javacFlag_l=`basename ${javacFlag_l}` if [ "$javacFlag_l" != "javac" ]; then f_die "Please check whether java jdk is installed properly. We are unable to find javac executable file. It seems like you have installed only JRE. Also check whether JAVA_HOME environmetn variable is set properly" fi } ########################################## ############## General ################### ########################################## function f_decEscapeCharacters(){ # Escape characters e_bold="\033[1m" e_underline="\033[4m" e_red="\033[0;31m" e_green="\033[0;32m" e_blue="\033[0;34m" e_normal="\033[0m" e_success="${e_bold}[${e_normal} ${e_green}OK${e_normal} ${e_bold}]${e_normal}" e_failure="${e_bold}[${e_normal} ${e_red}Failed${e_normal} ${e_bold}]${e_normal}" } function f_info(){ echo -e "========================================================================================================================" echo -e "\t\t\t${e_bold}Hadoop Installer${e_normal}" echo -e "\tThis script will install Hadoop and setup single node cluster." echo -e "\t${e_red}Only root user should execute this script${e_normal}" echo -e "========================================================================================================================" } function f_usage(){ local scriptName=$(basename "") echo -e "${e_bold}USAGE${e_normal}" echo -e "\t$scriptName [-h] [-g ${e_underline}HadoopGroup${e_normal}] [-u ${e_underline}HadoopUser${e_normal}] -t ${e_underline}tarFileName${e_normal}" echo -e "${e_bold}OPTIONS${e_normal}" echo -e "\t${e_bold}-h${e_normal}\t\t\tHelp - Flag used to display 'usage help' for the script" echo -e "\n\t${e_bold}-g ${e_underline}HadoopGroup${e_normal}\t\tHadoop Group Name - This group will be created (if not exists)" echo -e "\t\t\t\t If HadoopGroup is not specified it defaults to ${e_bold}'hadoop'${e_normal}" echo -e "\n\t${e_bold}-u ${e_underline}HadoopUser${e_normal}\t\tHadoop User Name - This user will be created (if not exists)" echo -e "\t\t\t\t If HadoopUser is not specified it defaults to ${e_bold}'hduser'${e_normal}" echo -e "\n\t${e_bold}-t ${e_underline}tarFileName${e_normal}\t\tHadoop Tar File Name - This tar file will be used" echo -e "\t\t\t\tto set-up Hadoop environment. If tarFileName is not specified, then" echo -e "\t\t\t\tyou will be prompted to specify during the execution of the script" echo -e "${e_bold}AUTHOR${e_normal}" echo -e "\t ${e_green}${e_bold}Puneetha B M${e_normal} - puneethabm@gmail.com \n" exit 0; } ########################################################## ############## Input Related Functions ################### ########################################################## # Read Input using prompt function f_readInput(){ local v_promptMsg="" local v_defaultVal="${2:-}" echo -e -n "${v_promptMsg} ${e_bold}[${e_normal} ${e_blue}${e_bold}${v_defaultVal}${e_normal} ${e_bold}]${e_normal} " read REPLY if [ -z "${REPLY}" ]; then REPLY=${v_defaultVal} fi } # Y/N Prompt function f_confirm() { local v_promptMsg="" local v_defaultVal="${2:-}" if [ "${v_defaultVal}" = "Y" ]; then v_defaultPrompt="${e_blue}${e_bold}Y${e_normal}/n" elif [ "${v_defaultVal}" = "N" ]; then v_defaultPrompt="y/${e_blue}${e_bold}N${e_normal}" else v_defaultPrompt="y/n" fi echo -e -n "${v_promptMsg} ${e_bold}[${e_normal} ${v_defaultPrompt} ${e_bold}]${e_normal} " read REPLY if [ -z "${REPLY}" ]; then REPLY=${v_defaultVal} fi case "$REPLY" in Y*|y*) return 0 ;; N*|n*) return 1 ;; esac } ########################################################## ##########################################################
#Filename: properties.txt
hadoopGroup="hadoop" hadoopGroupUser="hduser"