Skip to content

Instantly share code, notes, and snippets.

@bartekdobija
Last active October 30, 2015 14:52
Show Gist options
  • Save bartekdobija/6a95b5c7352e85d0d401 to your computer and use it in GitHub Desktop.
Save bartekdobija/6a95b5c7352e85d0d401 to your computer and use it in GitHub Desktop.

Revisions

  1. bartekdobija revised this gist Oct 30, 2015. 1 changed file with 44 additions and 14 deletions.
    58 changes: 44 additions & 14 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -153,7 +153,7 @@ $cloudera_deps = <<SCRIPT
    # Cloudera Hadoop installation
    yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop \
    hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode \
    hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode \
    hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc \
    hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager \
    hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server \
    @@ -165,19 +165,19 @@ $cloudera_deps = <<SCRIPT
    <configuration>
    <property>
    <name>mapred.job.tracker</name>
    <value>localhost:8021</value>
    <value>cdh.instance.com:8021</value>
    </property>
    <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
    </property>
    <property>
    <name>mapreduce.jobhistory.address</name>
    <value>localhost:10020</value>
    <value>cdh.instance.com:10020</value>
    </property>
    <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>localhost:19888</value>
    <value>cdh.instance.com:19888</value>
    </property>
    <property>
    <name>mapreduce.task.tmp.dir</name>
    @@ -191,6 +191,10 @@ $cloudera_deps = <<SCRIPT
    <name>mapreduce.reduce.memory.mb</name>
    <value>512</value>
    </property>
    <property>
    <name>yarn.app.mapreduce.am.staging-dir</name>
    <value>/user</value>
    </property>
    </configuration>
    HDPCNF
    @@ -292,6 +296,10 @@ YRNCNF
    <name>dfs.namenode.rpc-bind-host</name>
    <value>0.0.0.0</value>
    </property>
    <property>
    <name>dfs.namenode.acls.enabled</name>
    <value>true</value>
    </property>
    </configuration>
    HDFSCNF
    @@ -373,16 +381,20 @@ HIVECNF
    service hadoop-yarn-nodemanager start
    fi
    if [ ! "$(ps aux | grep historyserver | wc -l)" == "2" ]; then
    service hadoop-mapreduce-historyserver start
    fi
    echo "Creating HDFS directory structure" \
    && sudo -u hdfs hdfs dfs -mkdir -p {/user/{spark,hive/warehouse,oozie/share/lib},/tmp,/jobs,/var/log/hadoop-yarn} \
    && sudo -u hdfs hdfs dfs -chmod -R 777 / \
    && sudo -u hdfs hdfs dfs -mkdir -p {/user/{hadoop_oozie,spark,hive/warehouse,oozie/share/lib},/tmp,/jobs,/var/log/hadoop-yarn,/user/history} \
    && sudo -u hdfs hdfs dfs -chown -R hive:hive /user/hive \
    && sudo -u hdfs hdfs dfs -chown -R mapred:hadoop /user/history \
    && sudo -u hdfs hdfs dfs -chmod -R 1777 /user/history \
    && sudo -u hdfs hdfs dfs -chown -R oozie:oozie /user/oozie \
    && sudo -u hdfs hdfs dfs -chown -R yarn:yarn /var/log/hadoop-yarn
    && sudo -u hdfs hdfs dfs -chown -R hadoop_oozie:hadoop_oozie /user/hadoop_oozie \
    && sudo -u hdfs hdfs dfs -chown -R yarn:mapred /var/log/hadoop-yarn \
    && sudo -u hdfs hdfs dfs -chmod -R 1777 /
    # history server must start after hdfs privileges have been fixed
    if [ ! "$(ps aux | grep historyserver | wc -l)" == "2" ]; then
    service hadoop-mapreduce-historyserver start
    fi
    # start Hive processses
    if [ ! "$(ps aux | grep HiveMetaStore | wc -l)" == "2" ]; then
    @@ -450,10 +462,20 @@ HIVECNF
    <name>oozie.use.system.libpath</name>
    <value>true</value>
    </property>
    <property>
    <name>oozie.credentials.credentialclasses</name>
    <value>
    hcat=com.github.bartekdobija.oozieutils.creds.TestCreds,
    hive=com.github.bartekdobija.oozieutils.creds.TestCreds,
    hbase=com.github.bartekdobija.oozieutils.creds.TestCreds
    </value>
    </property>
    </configuration>
    OOZCNF
    OOZIE_UTILS=https://github.com/bartekdobija/oozie-utils/releases/download/0.7/oozieutils-0.7.jar
    # create an Oozie database if not exists and upload sharelib
    if [ ! -f /var/lib/mysql/oozie/WF_JOBS.frm ]; then
    @@ -467,7 +489,9 @@ OOZCNF
    && chown -R oozie:oozie /var/log/oozie \
    && sudo -u oozie /usr/lib/oozie/bin/oozie-setup.sh sharelib create \
    -fs hdfs://localhost/user/oozie/share/lib/ \
    -locallib /usr/lib/oozie/oozie-sharelib
    -locallib /usr/lib/oozie/oozie-sharelib \
    && rm -fR /usr/lib/oozie/libserver/oozieutils* \
    && wget ${OOZIE_UTILS} -q -P /usr/lib/oozie/libserver/
    fi
    echo "registering Spark configuration in Oozie" \
    @@ -546,20 +570,26 @@ $couchbase_deps= <<SCRIPT
    && /opt/couchbase/bin/couchbase-cli bucket-create -c localhost \
    --bucket=user_profile_versions \
    --bucket-type=couchbase \
    --bucket-ramsize=150 \
    --bucket-ramsize=100 \
    --bucket-replica=1 \
    --bucket-priority=high \
    --bucket-password=couchbase \
    -u couchbase \
    -p couchbase \
    --bucket-password=couchbase \
    && /opt/couchbase/bin/couchbase-cli bucket-create -c localhost \
    --bucket=user_profile \
    --bucket-type=couchbase \
    --bucket-ramsize=150 \
    --bucket-ramsize=100 \
    --bucket-replica=1 \
    --bucket-priority=high \
    --bucket-password=couchbase \
    -u couchbase \
    -p couchbase
    echo "couchbase" > /tmp/.couchbaseCreds \
    && sudo -u hadoop_oozie hdfs dfs -copyFromLocal -f /tmp/.couchbaseCreds /user/hadoop_oozie/
    fi
    SCRIPT
  2. bartekdobija revised this gist Oct 29, 2015. 1 changed file with 175 additions and 54 deletions.
    229 changes: 175 additions & 54 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -13,49 +13,17 @@ $anaconda_deps = <<SCRIPT
    SCRIPT

    # MySQL dependencies
    $mysql_deps = <<SCRIPT
    MYSQL_REPO=https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm
    MY_CNF=/etc/my.cnf
    DEV_PASSWORD=hadoop
    [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO}
    yum install -y mysql-community-server
    if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then
    echo "# InnoDB settings" >> ${MY_CNF}
    echo "default_storage_engine = innodb" >> ${MY_CNF}
    echo "innodb_file_per_table = 1" >> ${MY_CNF}
    echo "innodb_flush_log_at_trx_commit = 2" >> ${MY_CNF}
    echo "innodb_log_buffer_size = 64M" >> ${MY_CNF}
    echo "innodb_buffer_pool_size = 1G" >> ${MY_CNF}
    echo "innodb_thread_concurrency = 8" >> ${MY_CNF}
    echo "innodb_flush_method = O_DIRECT" >> ${MY_CNF}
    echo "innodb_log_file_size = 512M" >> ${MY_CNF}
    echo "explicit_defaults_for_timestamp = 1" >> ${MY_CNF}
    chkconfig mysqld on \
    && service mysqld start \
    && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" &> /dev/null \
    && echo "# vagrant provisioned" >> ${MY_CNF}
    mysql -u root -p${DEV_PASSWORD} \
    -e "create schema if not exists hive; grant all on hive.* to 'hive'@'localhost' identified by 'hive'" \
    && mysql -u root -p${DEV_PASSWORD} \
    -e "create schema if not exists oozie; grant all on oozie.* to 'oozie'@'localhost' identified by 'oozie'"
    fi
    SCRIPT

    # Spark dependencies
    $spark_deps = <<SCRIPT
    SPARK_VER=spark-1.5.0-bin-without-hadoop
    SPARK_BIN=http://stash.ryanair.com:7990/projects/BI/repos/spark-cdh-vagrant/browse/spark/${SPARK_VER}.tgz?raw
    SPARK_OUT=${SPARK_VER}.tgz
    SPARK_LINK=/opt/spark
    [ ! -e ${SPARK_LINK} ] \
    && echo "Spark installation..." \
    && tar zxf /vagrant/spark/${SPARK_VER}.tgz -C /opt/ \
    && wget ${SPARK_BIN} -q -O /tmp/${SPARK_OUT} \
    && tar zxf /vagrant/spark/${SPARK_OUT} -C /opt/ \
    && ln -s /opt/${SPARK_VER} ${SPARK_LINK}
    [ ! -e ${SPARK_LINK} ] && echo "Spark installation has failed!" && exit 1
    @@ -139,6 +107,41 @@ HIVECNF
    SCRIPT

    # MySQL dependencies
    $mysql_deps = <<SCRIPT
    MYSQL_REPO=https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm
    MY_CNF=/etc/my.cnf
    DEV_PASSWORD=hadoop
    [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO}
    yum install -y mysql-community-server
    if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then
    echo "# InnoDB settings" >> ${MY_CNF}
    echo "default_storage_engine = innodb" >> ${MY_CNF}
    echo "innodb_file_per_table = 1" >> ${MY_CNF}
    echo "innodb_flush_log_at_trx_commit = 2" >> ${MY_CNF}
    echo "innodb_log_buffer_size = 64M" >> ${MY_CNF}
    echo "innodb_buffer_pool_size = 1G" >> ${MY_CNF}
    echo "innodb_thread_concurrency = 8" >> ${MY_CNF}
    echo "innodb_flush_method = O_DIRECT" >> ${MY_CNF}
    echo "innodb_log_file_size = 512M" >> ${MY_CNF}
    echo "explicit_defaults_for_timestamp = 1" >> ${MY_CNF}
    chkconfig mysqld on \
    && service mysqld start \
    && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" &> /dev/null \
    && echo "# vagrant provisioned" >> ${MY_CNF}
    mysql -u root -p${DEV_PASSWORD} \
    -e "create schema if not exists hive; grant all on hive.* to 'hive'@'localhost' identified by 'hive'" \
    && mysql -u root -p${DEV_PASSWORD} \
    -e "create schema if not exists oozie; grant all on oozie.* to 'oozie'@'localhost' identified by 'oozie'"
    fi
    SCRIPT

    # Cloudera CDH dependencies
    $cloudera_deps = <<SCRIPT
    @@ -155,7 +158,7 @@ $cloudera_deps = <<SCRIPT
    hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager \
    hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server \
    oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog \
    hive-jdbc avro-libs pig kite impala*
    hive-jdbc avro-libs pig kite impala* openssl-devel openssl
    cat << HDPCNF > /etc/hadoop/conf/mapred-site.xml
    @@ -238,6 +241,61 @@ HDPCNF
    YRNCNF
    cat << HDFSCNF > /etc/hadoop/conf/hdfs-site.xml
    <configuration>
    <property>
    <name>dfs.replication</name>
    <value>1</value>
    </property>
    <property>
    <name>dfs.safemode.extension</name>
    <value>0</value>
    </property>
    <property>
    <name>dfs.safemode.min.datanodes</name>
    <value>1</value>
    </property>
    <property>
    <name>hadoop.tmp.dir</name>
    <value>/var/lib/hadoop-hdfs/cache/\\${user.name}</value>
    </property>
    <property>
    <name>dfs.namenode.name.dir</name>
    <value>file:///var/lib/hadoop-hdfs/cache/\\${user.name}/dfs/name</value>
    </property>
    <property>
    <name>dfs.namenode.checkpoint.dir</name>
    <value>file:///var/lib/hadoop-hdfs/cache/\\${user.name}/dfs/namesecondary</value>
    </property>
    <property>
    <name>dfs.datanode.data.dir</name>
    <value>file:///var/lib/hadoop-hdfs/cache/\\${user.name}/dfs/data</value>
    </property>
    <property>
    <name>dfs.client.read.shortcircuit</name>
    <value>true</value>
    </property>
    <property>
    <name>dfs.client.file-block-storage-locations.timeout.millis</name>
    <value>10000</value>
    </property>
    <property>
    <name>dfs.domain.socket.path</name>
    <value>/var/run/hadoop-hdfs/dn._PORT</value>
    </property>
    <property>
    <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
    <value>true</value>
    </property>
    <property>
    <name>dfs.namenode.rpc-bind-host</name>
    <value>0.0.0.0</value>
    </property>
    </configuration>
    HDFSCNF
    # format namenode
    if [ ! -e /var/lib/hadoop-hdfs/cache/hdfs ]; then
    echo "Formatting HDFS..." \
    @@ -293,6 +351,7 @@ HIVECNF
    && chkconfig hadoop-hdfs-datanode on \
    && chkconfig hadoop-yarn-resourcemanager on \
    && chkconfig hadoop-yarn-nodemanager on \
    && chkconfig hadoop-mapreduce-historyserver on \
    && chkconfig hive-metastore on \
    && chkconfig hive-server2 on \
    && chkconfig oozie on
    @@ -314,18 +373,16 @@ HIVECNF
    service hadoop-yarn-nodemanager start
    fi
    if [ ! "$(ps aux | grep historyserver | wc -l)" == "2" ]; then
    service hadoop-mapreduce-historyserver start
    fi
    echo "Creating HDFS directory structure" \
    &&sudo -u hdfs hdfs dfs -mkdir -p /user \
    && sudo -u hdfs hdfs dfs -chmod -R 777 /user \
    && sudo -u hdfs hdfs dfs -mkdir -p /user/spark \
    && sudo -u hdfs hdfs dfs -chmod -R 755 /user/spark \
    && sudo -u hdfs hdfs dfs -mkdir -p /tmp \
    && sudo -u hdfs hdfs dfs -chmod -R 777 /tmp \
    && sudo -u hdfs hdfs dfs -mkdir -p /user/hive/warehouse \
    && sudo -u hdfs hdfs dfs -mkdir -p {/user/{spark,hive/warehouse,oozie/share/lib},/tmp,/jobs,/var/log/hadoop-yarn} \
    && sudo -u hdfs hdfs dfs -chmod -R 777 / \
    && sudo -u hdfs hdfs dfs -chown -R hive:hive /user/hive \
    && sudo -u hdfs hdfs dfs -chmod -R 755 /user/hive/warehouse \
    && sudo -u hdfs hdfs dfs -mkdir -p /user/oozie/share/lib \
    && sudo -u hdfs hdfs dfs -chown -R oozie:oozie /user/oozie
    && sudo -u hdfs hdfs dfs -chown -R oozie:oozie /user/oozie \
    && sudo -u hdfs hdfs dfs -chown -R yarn:yarn /var/log/hadoop-yarn
    # start Hive processses
    if [ ! "$(ps aux | grep HiveMetaStore | wc -l)" == "2" ]; then
    @@ -381,10 +438,6 @@ HIVECNF
    <name>oozie.service.ProxyUserService.proxyuser.hue.groups</name>
    <value>*</value>
    </property>
    <property>
    <name>oozie.service.WorkflowAppService.system.libpath</name>
    <value>/usr/lib/oozie/oozie-sharelib</value>
    </property>
    <property>
    <name>use.system.libpath.for.mapreduce.and.pig.jobs</name>
    <value>true</value>
    @@ -393,16 +446,28 @@ HIVECNF
    <name>oozie.service.PurgeService.purge.old.coord.action</name>
    <value>true</value>
    </property>
    <property>
    <name>oozie.use.system.libpath</name>
    <value>true</value>
    </property>
    </configuration>
    OOZCNF
    # create an Oozie database if not exists and upload sharelib
    if [ ! -f /var/lib/mysql/oozie/WF_JOBS.frm ]; then
    mkdir -p /user/oozie/share/lib \
    && chown -R oozie:oozie /user/oozie \
    && rm -fR /etc/oozie/conf/hadoop-conf \
    && ln -s /etc/hadoop/conf /etc/oozie/conf/hadoop-conf
    echo "Creating Oozie database" \
    && /usr/lib/oozie/bin/ooziedb.sh create -run \
    && mkdir -p /opt/sharelib \
    && /usr/lib/oozie/bin/oozie-setup.sh sharelib create -fs /opt/sharelib -locallib /usr/lib/oozie/oozie-sharelib
    && chown -R oozie:oozie /var/log/oozie \
    && sudo -u oozie /usr/lib/oozie/bin/oozie-setup.sh sharelib create \
    -fs hdfs://localhost/user/oozie/share/lib/ \
    -locallib /usr/lib/oozie/oozie-sharelib
    fi
    echo "registering Spark configuration in Oozie" \
    @@ -412,11 +477,25 @@ OOZCNF
    service oozie start
    fi
    echo "export OOZIE_URL=http://localhost:11000/oozie" > /etc/profile.d/oozie.sh
    SCRIPT

    # OS configuration
    $system_config = <<SCRIPT
    # disable IPv6
    if [ "$(grep disable_ipv6 /etc/sysctl.conf | wc -l)" == "0" ]; then
    echo "net.ipv6.conf.all.disable_ipv6=1" >> /etc/sysctl.conf \
    && sysctl -f /etc/sysctl.conf
    fi
    # this should be a persistent config
    ulimit -n 65536
    ulimit -s 10240
    ulimit -c unlimited
    DEV_USER=hadoop_oozie
    DEV_PASSWORD=hadoop
    PROXY_CONFIG=/etc/profile.d/proxy.sh
    @@ -442,6 +521,47 @@ $system_config = <<SCRIPT
    && echo "${DEV_USER} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/hadoop_oozie
    fi
    if [ "$(grep vm.swappiness /etc/sysctl.conf | wc -l)" == "0" ]; then
    echo "vm.swappiness=0" >> /etc/sysctl.conf && sysctl vm.swappiness=0
    fi
    SCRIPT

    # Couchbase dependencies
    $couchbase_deps= <<SCRIPT
    COUCHBASE_VER=couchbase-server-community-3.0.1-centos6.x86_64.rpm
    COUCHBASE_LINK=/opt/couchbase
    if [ ! -e ${COUCHBASE_LINK} ]; then
    wget http://packages.couchbase.com/releases/3.0.1/${COUCHBASE_VER} -q -P /tmp/ \
    && rpm -iv /tmp/${COUCHBASE_VER} \
    && chkconfig couchbase-server on \
    && sleep 20 \
    && /opt/couchbase/bin/couchbase-cli cluster-init \
    -c localhost \
    -u couchbase \
    -p couchbase \
    --cluster-ramsize=350 \
    && /opt/couchbase/bin/couchbase-cli bucket-create -c localhost \
    --bucket=user_profile_versions \
    --bucket-type=couchbase \
    --bucket-ramsize=150 \
    --bucket-replica=1 \
    --bucket-priority=high \
    -u couchbase \
    -p couchbase \
    && /opt/couchbase/bin/couchbase-cli bucket-create -c localhost \
    --bucket=user_profile \
    --bucket-type=couchbase \
    --bucket-ramsize=150 \
    --bucket-replica=1 \
    --bucket-priority=high \
    -u couchbase \
    -p couchbase
    fi
    SCRIPT

    $information = <<SCRIPT
    @@ -463,7 +583,7 @@ Vagrant.configure(2) do |config|
    config.vm.network :public_network, :mac => "0800DEADBEEF"

    config.vm.provider "virtualbox" do |vb|
    vb.name = "cloudera-hadoop"
    vb.name = "dev-hadoop-env"
    vb.cpus = 4
    vb.memory = 8192
    vb.customize ["modifyvm", :id, "--nicpromisc2", "allow-all"]
    @@ -475,6 +595,7 @@ Vagrant.configure(2) do |config|
    config.vm.provision :shell, :name => "mysql_deps", :inline => $mysql_deps
    config.vm.provision :shell, :name => "spark_deps", :inline => $spark_deps
    config.vm.provision :shell, :name => "cloudera_deps", :inline => $cloudera_deps
    config.vm.provision :shell, :name => "couchbase_deps", :inline => $couchbase_deps
    config.vm.provision :shell, :name => "information", :inline => $information

    end
  3. bartekdobija revised this gist Oct 20, 2015. 1 changed file with 165 additions and 86 deletions.
    251 changes: 165 additions & 86 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -22,7 +22,7 @@ $mysql_deps = <<SCRIPT
    [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO}
    yum install -y mysql-community-server
    yum install -y mysql-community-server
    if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then
    echo "# InnoDB settings" >> ${MY_CNF}
    @@ -39,25 +39,26 @@ $mysql_deps = <<SCRIPT
    && service mysqld start \
    && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" &> /dev/null \
    && echo "# vagrant provisioned" >> ${MY_CNF}
    mysql -u root -p${DEV_PASSWORD} \
    -e "create schema if not exists hive; grant all on hive.* to 'hive'@'localhost' identified by 'hive'"
    -e "create schema if not exists hive; grant all on hive.* to 'hive'@'localhost' identified by 'hive'" \
    && mysql -u root -p${DEV_PASSWORD} \
    -e "create schema if not exists oozie; grant all on oozie.* to 'oozie'@'localhost' identified by 'oozie'"
    fi
    SCRIPT

    # Spark dependencies
    $spark_deps = <<SCRIPT
    SPARK_TGZ=spark-1.5.1-bin-without-hadoop.tgz
    SPARK_VER=spark-1.5.0-bin-without-hadoop
    SPARK_LINK=/opt/spark
    [ ! -e ${SPARK_LINK} ] \
    && echo "Spark installation..." \
    && echo "downloading binaries" \
    && wget http://ftp.heanet.ie/mirrors/www.apache.org/dist/spark/spark-1.5.1/${SPARK_TGZ} -q -P /opt/ \
    && tar zxf /opt/${SPARK_TGZ} -C /opt/ \
    && ln -s /opt/spark-1.5.1-bin-without-hadoop ${SPARK_LINK}
    && tar zxf /vagrant/spark/${SPARK_VER}.tgz -C /opt/ \
    && ln -s /opt/${SPARK_VER} ${SPARK_LINK}
    [ ! -e /opt/${SPARK_TGZ} ] && exit 1
    [ ! -e ${SPARK_LINK} ] && echo "Spark installation has failed!" && exit 1
    echo "Spark configuration..."
    echo "configuring /etc/profile.d/spark.sh"
    @@ -159,85 +160,80 @@ $cloudera_deps = <<SCRIPT
    cat << HDPCNF > /etc/hadoop/conf/mapred-site.xml
    <configuration>
    <property>
    <name>mapred.job.tracker</name>
    <value>localhost:8021</value>
    </property>
    <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
    </property>
    <property>
    <name>mapreduce.jobhistory.address</name>
    <value>localhost:10020</value>
    </property>
    <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>localhost:19888</value>
    </property>
    <property>
    <description>To set the value of tmp directory for map and reduce tasks.</description>
    <name>mapreduce.task.tmp.dir</name>
    <value>/var/lib/hadoop-mapreduce/cache/\\${user.name}/tasks</value>
    </property>
    <property>
    <name>mapreduce.map.memory.mb</name>
    <value>512</value>
    </property>
    <property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>512</value>
    </property>
    <property>
    <name>mapred.job.tracker</name>
    <value>localhost:8021</value>
    </property>
    <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
    </property>
    <property>
    <name>mapreduce.jobhistory.address</name>
    <value>localhost:10020</value>
    </property>
    <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>localhost:19888</value>
    </property>
    <property>
    <name>mapreduce.task.tmp.dir</name>
    <value>/var/lib/hadoop-mapreduce/cache/\\${user.name}/tasks</value>
    </property>
    <property>
    <name>mapreduce.map.memory.mb</name>
    <value>512</value>
    </property>
    <property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>512</value>
    </property>
    </configuration>
    HDPCNF
    cat << YRNCNF > /etc/hadoop/conf/yarn-site.xml
    <configuration>
    <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle,spark_shuffle</value>
    </property>
    <property>
    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
    <name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
    <value>org.apache.spark.network.yarn.YarnShuffleService</value>
    </property>
    <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
    </property>
    <property>
    <name>yarn.dispatcher.exit-on-error</name>
    <value>true</value>
    </property>
    <property>
    <description>List of directories to store localized files in.</description>
    <name>yarn.nodemanager.local-dirs</name>
    <value>/var/lib/hadoop-yarn/cache/\\${user.name}/nm-local-dir</value>
    </property>
    <property>
    <description>Where to store container logs.</description>
    <name>yarn.nodemanager.log-dirs</name>
    <value>/var/log/hadoop-yarn/containers</value>
    </property>
    <property>
    <description>Where to aggregate logs to.</description>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>/var/log/hadoop-yarn/apps</value>
    </property>
    <property>
    <description>Classpath for typical applications.</description>
    <name>yarn.application.classpath</name>
    <value>\\$HADOOP_CONF_DIR,\\$HADOOP_COMMON_HOME/*,\\$HADOOP_COMMON_HOME/lib/*,\\$HADOOP_HDFS_HOME/*,
    \\$HADOOP_HDFS_HOME/lib/*,\\$HADOOP_MAPRED_HOME/*,\\$HADOOP_MAPRED_HOME/lib/*,\\$HADOOP_YARN_HOME/*,
    \\$HADOOP_YARN_HOME/lib/*
    </value>
    </property>
    <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle,spark_shuffle</value>
    </property>
    <property>
    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
    <name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
    <value>org.apache.spark.network.yarn.YarnShuffleService</value>
    </property>
    <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
    </property>
    <property>
    <name>yarn.dispatcher.exit-on-error</name>
    <value>true</value>
    </property>
    <property>
    <name>yarn.nodemanager.local-dirs</name>
    <value>/var/lib/hadoop-yarn/cache/\\${user.name}/nm-local-dir</value>
    </property>
    <property>
    <name>yarn.nodemanager.log-dirs</name>
    <value>/var/log/hadoop-yarn/containers</value>
    </property>
    <property>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>/var/log/hadoop-yarn/apps</value>
    </property>
    <property>
    <name>yarn.application.classpath</name>
    <value>\\$HADOOP_CONF_DIR,\\$HADOOP_COMMON_HOME/*,\\$HADOOP_COMMON_HOME/lib/*,\\$HADOOP_HDFS_HOME/*,
    \\$HADOOP_HDFS_HOME/lib/*,\\$HADOOP_MAPRED_HOME/*,\\$HADOOP_MAPRED_HOME/lib/*,\\$HADOOP_YARN_HOME/*,
    \\$HADOOP_YARN_HOME/lib/*
    </value>
    </property>
    </configuration>
    YRNCNF
    @@ -256,8 +252,10 @@ YRNCNF
    && wget ${MYSQL_JDBC_SOURCE} -q -P /tmp/ \
    && echo "Installing MySQL JDBC drivers" \
    && tar zxf /tmp/${MYSQL_JDBC}.tar.gz -C /tmp/ \
    && mkdir -p /usr/lib/oozie/libext \
    && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/lib/hive/lib/ \
    && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/local/lib/jdbc/mysql/
    && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/local/lib/jdbc/mysql/ \
    && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/lib/oozie/libext/
    cat << HIVECNF > /etc/hive/conf/hive-site.xml
    @@ -299,7 +297,7 @@ HIVECNF
    && chkconfig hive-server2 on \
    && chkconfig oozie on
    # start hadoop processes
    # start Hadoop processses
    if [ ! "$(ps aux | grep hdfs-namenode | wc -l)" == "2" ]; then
    service hadoop-hdfs-namenode start
    fi
    @@ -324,9 +322,12 @@ HIVECNF
    && sudo -u hdfs hdfs dfs -mkdir -p /tmp \
    && sudo -u hdfs hdfs dfs -chmod -R 777 /tmp \
    && sudo -u hdfs hdfs dfs -mkdir -p /user/hive/warehouse \
    && sudo -u hdfs hdfs dfs -chown -R hive:hive /user/hive/warehouse \
    && sudo -u hdfs hdfs dfs -chmod -R 755 /user/hive/warehouse
    && sudo -u hdfs hdfs dfs -chown -R hive:hive /user/hive \
    && sudo -u hdfs hdfs dfs -chmod -R 755 /user/hive/warehouse \
    && sudo -u hdfs hdfs dfs -mkdir -p /user/oozie/share/lib \
    && sudo -u hdfs hdfs dfs -chown -R oozie:oozie /user/oozie
    # start Hive processses
    if [ ! "$(ps aux | grep HiveMetaStore | wc -l)" == "2" ]; then
    service hive-metastore start
    fi
    @@ -335,6 +336,82 @@ HIVECNF
    service hive-server2 start
    fi
    # Oozie configuration
    echo "Deploying oozie-site.xml"
    cat << OOZCNF > /etc/oozie/conf/oozie-site.xml
    <configuration>
    <property>
    <name>oozie.service.JPAService.create.db.schema</name>
    <value>true</value>
    </property>
    <property>
    <name>oozie.service.JPAService.validate.db.connection</name>
    <value>true</value>
    </property>
    <property>
    <name>oozie.service.JPAService.jdbc.driver</name>
    <value>com.mysql.jdbc.Driver</value>
    </property>
    <property>
    <name>oozie.service.JPAService.jdbc.url</name>
    <value>jdbc:mysql://localhost:3306/oozie?createDatabaseIfNotExist=true</value>
    </property>
    <property>
    <name>oozie.service.JPAService.jdbc.username</name>
    <value>oozie</value>
    </property>
    <property>
    <name>oozie.service.JPAService.jdbc.password</name>
    <value>oozie</value>
    </property>
    <property>
    <name>oozie.service.ProxyUserService.proxyuser.oozie.hosts</name>
    <value>*</value>
    </property>
    <property>
    <name>oozie.service.ProxyUserService.proxyuser.oozie.groups</name>
    <value>*</value>
    </property>
    <property>
    <name>oozie.service.ProxyUserService.proxyuser.hue.hosts</name>
    <value>*</value>
    </property>
    <property>
    <name>oozie.service.ProxyUserService.proxyuser.hue.groups</name>
    <value>*</value>
    </property>
    <property>
    <name>oozie.service.WorkflowAppService.system.libpath</name>
    <value>/usr/lib/oozie/oozie-sharelib</value>
    </property>
    <property>
    <name>use.system.libpath.for.mapreduce.and.pig.jobs</name>
    <value>true</value>
    </property>
    <property>
    <name>oozie.service.PurgeService.purge.old.coord.action</name>
    <value>true</value>
    </property>
    </configuration>
    OOZCNF
    # create an Oozie database if not exists and upload sharelib
    if [ ! -f /var/lib/mysql/oozie/WF_JOBS.frm ]; then
    echo "Creating Oozie database" \
    && /usr/lib/oozie/bin/ooziedb.sh create -run \
    && mkdir -p /opt/sharelib \
    && /usr/lib/oozie/bin/oozie-setup.sh sharelib create -fs /opt/sharelib -locallib /usr/lib/oozie/oozie-sharelib
    fi
    echo "registering Spark configuration in Oozie" \
    && ln -f -s /opt/spark/conf /etc/oozie/conf/spark-conf
    if [ ! "$(ps aux | grep oozie | wc -l)" == "2" ]; then
    service oozie start
    fi
    SCRIPT

    # OS configuration
    @@ -370,8 +447,10 @@ SCRIPT
    $information = <<SCRIPT
    ip=$(ifconfig eth1 | awk -v host=$(hostname) '/inet addr/ {print substr($2,6)}')
    echo "Guest IP address: $ip"
    echo "Namenode's UI available at: http://$ip:50070"
    echo "Resource Manager's UI available at: http://$ip:8088"
    echo "Namenode UI available at: http://$ip:50070"
    echo "Resource Manager UI available at: http://$ip:8088"
    echo "Oozie endpoint available at: http://$ip:11000/oozie"
    echo "Spark 1.5 available under /opt/spark"
    echo "MySQL root password: hadoop"
    echo "You may want to add the below line to /etc/hosts:"
    echo "$ip cdh.instance.com"
  4. bartekdobija revised this gist Oct 19, 2015. No changes.
  5. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -122,7 +122,7 @@ SPCNF
    </property>
    <property>
    <name>hive.metastore.uris</name>
    <value>thrift://localhost:9083</value>
    <value>thrift://cdh.instance.com:9083</value>
    </property>
    <property>
    <name>hive.metastore.warehouse.dir</name>
    @@ -280,7 +280,7 @@ YRNCNF
    </property>
    <property>
    <name>hive.metastore.uris</name>
    <value>thrift://localhost:9083</value>
    <value>thrift://cdh.instance.com:9083</value>
    </property>
    <property>
    <name>hive.metastore.warehouse.dir</name>
  6. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 186 additions and 16 deletions.
    202 changes: 186 additions & 16 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -5,7 +5,7 @@ $anaconda_deps = <<SCRIPT
    if [ ! -d "/usr/local/anaconda" ]; then
    echo "Anaconda installation..." \
    && echo "getting binaries" \
    && echo "downloading binaries" \
    && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \
    && echo "running installer" \
    && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda
    @@ -22,10 +22,9 @@ $mysql_deps = <<SCRIPT
    [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO}
    yum install -y mysql-community-server mysql-connector-java
    yum install -y mysql-community-server
    if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then
    echo "# InnoDB settings" >> ${MY_CNF}
    echo "default_storage_engine = innodb" >> ${MY_CNF}
    echo "innodb_file_per_table = 1" >> ${MY_CNF}
    @@ -36,12 +35,12 @@ $mysql_deps = <<SCRIPT
    echo "innodb_flush_method = O_DIRECT" >> ${MY_CNF}
    echo "innodb_log_file_size = 512M" >> ${MY_CNF}
    echo "explicit_defaults_for_timestamp = 1" >> ${MY_CNF}
    chkconfig mysqld on \
    && service mysqld start \
    && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" &> /dev/null \
    && echo "# vagrant provisioned" >> ${MY_CNF}
    mysql -u root -p${DEV_PASSWORD} \
    -e "create schema if not exists hive; grant all on hive.* to 'hive'@'localhost' identified by 'hive'"
    fi
    SCRIPT
    @@ -51,9 +50,9 @@ $spark_deps = <<SCRIPT
    SPARK_TGZ=spark-1.5.1-bin-without-hadoop.tgz
    SPARK_LINK=/opt/spark
    [ ! -e ${SPARK_LINK} ] \
    && echo "Spark installation..." \
    && echo "downloading binaries" \
    && wget http://ftp.heanet.ie/mirrors/www.apache.org/dist/spark/spark-1.5.1/${SPARK_TGZ} -q -P /opt/ \
    && tar zxf /opt/${SPARK_TGZ} -C /opt/ \
    && ln -s /opt/spark-1.5.1-bin-without-hadoop ${SPARK_LINK}
    @@ -66,16 +65,13 @@ $spark_deps = <<SCRIPT
    echo "configuring /opt/spark/conf/spark-env.sh"
    cat << SPCNF > /opt/spark/conf/spark-env.sh
    HADOOP_CONF_DIR=/etc/hadoop/conf/
    SPARK_DIST_CLASSPATH=\\$(hadoop classpath)
    LD_LIBRARY_PATH=\\${LD_LIBRARY_PATH}:/opt/cloudera/parcels/CDH/lib/hadoop/lib/native/
    SPCNF
    echo "configuring ${SPARK_LINK}/conf/spark-defaults.conf"
    cat << SPCNF > ${SPARK_LINK}/conf/spark-defaults.conf
    echo "configuring ${SPARK_LINK}/conf/spark-defaults.conf"
    cat << SPCNF > ${SPARK_LINK}/conf/spark-defaults.conf
    spark.shuffle.service.enabled true
    # Execution Behavior
    spark.broadcast.blockSize 4096
    @@ -102,10 +98,43 @@ spark.executor.extraJavaOptions -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTime
    spark.driver.extraJavaOptions -XX:+UseCompressedOops -XX:MaxPermSize=1g
    spark.executor.extraClassPath /usr/local/lib/jdbc/sqlserver/*.jar:/usr/local/lib/jdbc/mysql/*.jar:/usr/local/anaconda/bin:/opt/udfs/hive/*.jar
    spark.driver.extraClassPath /usr/local/lib/jdbc/sqlserver/*.jar:/usr/local/lib/jdbc/mysql/*.jar:/usr/local/anaconda/bin:/opt/udfs/hive/*.jar
    SPCNF
    echo "Add hive-site.xml configuration here !!!"
    echo "configuring ${SPARK_LINK}/conf/hive-site.xml"
    cat << HIVECNF > ${SPARK_LINK}/conf/hive-site.xml
    <configuration>
    <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value>
    </property>
    <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>hive</value>
    </property>
    <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>hive</value>
    </property>
    <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
    </property>
    <property>
    <name>hive.metastore.uris</name>
    <value>thrift://localhost:9083</value>
    </property>
    <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>hdfs:///user/hive/warehouse</value>
    </property>
    </configuration>
    HIVECNF
    echo "installing resource scheduler" \
    && mkdir -p /usr/lib/hadoop-yarn/lib/ \
    && cp -f ${SPARK_LINK}/lib/spark-*-yarn-shuffle.jar /usr/lib/hadoop-yarn/lib/
    SCRIPT

    @@ -127,12 +156,140 @@ $cloudera_deps = <<SCRIPT
    oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog \
    hive-jdbc avro-libs pig kite impala*
    cat << HDPCNF > /etc/hadoop/conf/mapred-site.xml
    <configuration>
    <property>
    <name>mapred.job.tracker</name>
    <value>localhost:8021</value>
    </property>
    <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
    </property>
    <property>
    <name>mapreduce.jobhistory.address</name>
    <value>localhost:10020</value>
    </property>
    <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>localhost:19888</value>
    </property>
    <property>
    <description>To set the value of tmp directory for map and reduce tasks.</description>
    <name>mapreduce.task.tmp.dir</name>
    <value>/var/lib/hadoop-mapreduce/cache/\\${user.name}/tasks</value>
    </property>
    <property>
    <name>mapreduce.map.memory.mb</name>
    <value>512</value>
    </property>
    <property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>512</value>
    </property>
    </configuration>
    HDPCNF
    cat << YRNCNF > /etc/hadoop/conf/yarn-site.xml
    <configuration>
    <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle,spark_shuffle</value>
    </property>
    <property>
    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
    <name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
    <value>org.apache.spark.network.yarn.YarnShuffleService</value>
    </property>
    <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
    </property>
    <property>
    <name>yarn.dispatcher.exit-on-error</name>
    <value>true</value>
    </property>
    <property>
    <description>List of directories to store localized files in.</description>
    <name>yarn.nodemanager.local-dirs</name>
    <value>/var/lib/hadoop-yarn/cache/\\${user.name}/nm-local-dir</value>
    </property>
    <property>
    <description>Where to store container logs.</description>
    <name>yarn.nodemanager.log-dirs</name>
    <value>/var/log/hadoop-yarn/containers</value>
    </property>
    <property>
    <description>Where to aggregate logs to.</description>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>/var/log/hadoop-yarn/apps</value>
    </property>
    <property>
    <description>Classpath for typical applications.</description>
    <name>yarn.application.classpath</name>
    <value>\\$HADOOP_CONF_DIR,\\$HADOOP_COMMON_HOME/*,\\$HADOOP_COMMON_HOME/lib/*,\\$HADOOP_HDFS_HOME/*,
    \\$HADOOP_HDFS_HOME/lib/*,\\$HADOOP_MAPRED_HOME/*,\\$HADOOP_MAPRED_HOME/lib/*,\\$HADOOP_YARN_HOME/*,
    \\$HADOOP_YARN_HOME/lib/*
    </value>
    </property>
    </configuration>
    YRNCNF
    # format namenode
    if [ ! -e /var/lib/hadoop-hdfs/cache/hdfs ]; then
    echo "Formatting HDFS..." \
    && sudo -u hdfs hdfs namenode -format -force &> /dev/null
    fi
    MYSQL_JDBC=mysql-connector-java-5.1.37
    MYSQL_JDBC_SOURCE=http://dev.mysql.com/get/Downloads/Connector-J/${MYSQL_JDBC}.tar.gz
    mkdir -p /usr/local/lib/jdbc/mysql \
    && echo "Downloading MySQL JDBC drivers" \
    && wget ${MYSQL_JDBC_SOURCE} -q -P /tmp/ \
    && echo "Installing MySQL JDBC drivers" \
    && tar zxf /tmp/${MYSQL_JDBC}.tar.gz -C /tmp/ \
    && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/lib/hive/lib/ \
    && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/local/lib/jdbc/mysql/
    cat << HIVECNF > /etc/hive/conf/hive-site.xml
    <configuration>
    <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value>
    </property>
    <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>hive</value>
    </property>
    <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>hive</value>
    </property>
    <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
    </property>
    <property>
    <name>hive.metastore.uris</name>
    <value>thrift://localhost:9083</value>
    </property>
    <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>hdfs:///user/hive/warehouse</value>
    </property>
    </configuration>
    HIVECNF
    # auto-start services
    chkconfig hadoop-hdfs-namenode on \
    && chkconfig hadoop-hdfs-datanode on \
    @@ -143,7 +300,7 @@ $cloudera_deps = <<SCRIPT
    && chkconfig oozie on
    # start hadoop processes
    if [ ! "$(ps aux | grep namenode | wc -l)" == "2" ]; then
    if [ ! "$(ps aux | grep hdfs-namenode | wc -l)" == "2" ]; then
    service hadoop-hdfs-namenode start
    fi
    @@ -162,8 +319,21 @@ $cloudera_deps = <<SCRIPT
    echo "Creating HDFS directory structure" \
    &&sudo -u hdfs hdfs dfs -mkdir -p /user \
    && sudo -u hdfs hdfs dfs -chmod -R 777 /user \
    && sudo -u hdfs hdfs dfs -mkdir -p /user/spark \
    && sudo -u hdfs hdfs dfs -chmod -R 755 /user/spark \
    && sudo -u hdfs hdfs dfs -mkdir -p /tmp \
    && sudo -u hdfs hdfs dfs -chmod -R 777 /tmp
    && sudo -u hdfs hdfs dfs -chmod -R 777 /tmp \
    && sudo -u hdfs hdfs dfs -mkdir -p /user/hive/warehouse \
    && sudo -u hdfs hdfs dfs -chown -R hive:hive /user/hive/warehouse \
    && sudo -u hdfs hdfs dfs -chmod -R 755 /user/hive/warehouse
    if [ ! "$(ps aux | grep HiveMetaStore | wc -l)" == "2" ]; then
    service hive-metastore start
    fi
    if [ ! "$(ps aux | grep HiveServer2 | wc -l)" == "2" ]; then
    service hive-server2 start
    fi
    SCRIPT

    @@ -211,7 +381,7 @@ Vagrant.configure(2) do |config|

    config.vm.box = "boxcutter/centos66"
    config.vm.hostname = "cdh.instance.com"
    config.vm.network :public_network, :bridge => "en3: Thunderbolt Ethernet", :mac => "0800DEADBEEF"
    config.vm.network :public_network, :mac => "0800DEADBEEF"

    config.vm.provider "virtualbox" do |vb|
    vb.name = "cloudera-hadoop"
  7. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -22,7 +22,7 @@ $mysql_deps = <<SCRIPT
    [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO}
    yum install -y mysql-community-server
    yum install -y mysql-community-server mysql-connector-java
    if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then
  8. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -27,6 +27,7 @@ $mysql_deps = <<SCRIPT
    if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then
    echo "# InnoDB settings" >> ${MY_CNF}
    echo "default_storage_engine = innodb" >> ${MY_CNF}
    echo "innodb_file_per_table = 1" >> ${MY_CNF}
    echo "innodb_flush_log_at_trx_commit = 2" >> ${MY_CNF}
    echo "innodb_log_buffer_size = 64M" >> ${MY_CNF}
  9. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -108,7 +108,7 @@ SPCNF
    SCRIPT

    # Cloduera CDH dependencies
    # Cloudera CDH dependencies
    $cloudera_deps = <<SCRIPT
    CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo
    @@ -201,6 +201,7 @@ $information = <<SCRIPT
    echo "Guest IP address: $ip"
    echo "Namenode's UI available at: http://$ip:50070"
    echo "Resource Manager's UI available at: http://$ip:8088"
    echo "MySQL root password: hadoop"
    echo "You may want to add the below line to /etc/hosts:"
    echo "$ip cdh.instance.com"
    SCRIPT
  10. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 22 additions and 4 deletions.
    26 changes: 22 additions & 4 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -38,7 +38,7 @@ $mysql_deps = <<SCRIPT
    chkconfig mysqld on \
    && service mysqld start \
    && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" \
    && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" &> /dev/null \
    && echo "# vagrant provisioned" >> ${MY_CNF}
    fi
    @@ -118,11 +118,18 @@ $cloudera_deps = <<SCRIPT
    && wget ${CLOUDERA_REPO} -q -P /etc/yum.repos.d/
    # Cloudera Hadoop installation
    yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala*
    yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop \
    hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode \
    hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc \
    hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager \
    hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server \
    oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog \
    hive-jdbc avro-libs pig kite impala*
    # format namenode
    if [ ! -e /var/lib/hadoop-hdfs/cache/hdfs ]; then
    echo "Formatting HDFS..." && sudo -u hdfs hdfs namenode -format -force &> /dev/null
    echo "Formatting HDFS..." \
    && sudo -u hdfs hdfs namenode -format -force &> /dev/null
    fi
    # auto-start services
    @@ -151,7 +158,8 @@ $cloudera_deps = <<SCRIPT
    service hadoop-yarn-nodemanager start
    fi
    sudo -u hdfs hdfs dfs -mkdir -p /user \
    echo "Creating HDFS directory structure" \
    &&sudo -u hdfs hdfs dfs -mkdir -p /user \
    && sudo -u hdfs hdfs dfs -chmod -R 777 /user \
    && sudo -u hdfs hdfs dfs -mkdir -p /tmp \
    && sudo -u hdfs hdfs dfs -chmod -R 777 /tmp
    @@ -188,6 +196,15 @@ $system_config = <<SCRIPT
    SCRIPT

    $information = <<SCRIPT
    ip=$(ifconfig eth1 | awk -v host=$(hostname) '/inet addr/ {print substr($2,6)}')
    echo "Guest IP address: $ip"
    echo "Namenode's UI available at: http://$ip:50070"
    echo "Resource Manager's UI available at: http://$ip:8088"
    echo "You may want to add the below line to /etc/hosts:"
    echo "$ip cdh.instance.com"
    SCRIPT

    Vagrant.configure(2) do |config|

    config.vm.box = "boxcutter/centos66"
    @@ -207,5 +224,6 @@ Vagrant.configure(2) do |config|
    config.vm.provision :shell, :name => "mysql_deps", :inline => $mysql_deps
    config.vm.provision :shell, :name => "spark_deps", :inline => $spark_deps
    config.vm.provision :shell, :name => "cloudera_deps", :inline => $cloudera_deps
    config.vm.provision :shell, :name => "information", :inline => $information

    end
  11. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 5 additions and 1 deletion.
    6 changes: 5 additions & 1 deletion Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -75,7 +75,6 @@ SPCNF
    echo "configuring ${SPARK_LINK}/conf/spark-defaults.conf"
    cat << SPCNF > ${SPARK_LINK}/conf/spark-defaults.conf
    spark.yarn.jar hdfs:///user/spark/share/lib/spark-assembly-1.5.0-hadoop2.6.0.jar
    spark.shuffle.service.enabled true
    # Execution Behavior
    spark.broadcast.blockSize 4096
    @@ -152,6 +151,11 @@ $cloudera_deps = <<SCRIPT
    service hadoop-yarn-nodemanager start
    fi
    sudo -u hdfs hdfs dfs -mkdir -p /user \
    && sudo -u hdfs hdfs dfs -chmod -R 777 /user \
    && sudo -u hdfs hdfs dfs -mkdir -p /tmp \
    && sudo -u hdfs hdfs dfs -chmod -R 777 /tmp
    SCRIPT

    # OS configuration
  12. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 36 additions and 0 deletions.
    36 changes: 36 additions & 0 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,4 @@
    # Anaconda dependencies
    $anaconda_deps = <<SCRIPT
    ANACONDA_INSTALLER=https://3230d63b5fc54e62148e-c95ac804525aac4b6dba79b00b39d1d3.ssl.cf1.rackcdn.com/Anaconda-2.3.0-Linux-x86_64.sh
    @@ -12,6 +13,7 @@ $anaconda_deps = <<SCRIPT
    SCRIPT

    # MySQL dependencies
    $mysql_deps = <<SCRIPT
    MYSQL_REPO=https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm
    @@ -43,6 +45,7 @@ $mysql_deps = <<SCRIPT
    SCRIPT

    # Spark dependencies
    $spark_deps = <<SCRIPT
    SPARK_TGZ=spark-1.5.1-bin-without-hadoop.tgz
    @@ -106,6 +109,7 @@ SPCNF
    SCRIPT

    # Cloduera CDH dependencies
    $cloudera_deps = <<SCRIPT
    CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo
    @@ -117,8 +121,40 @@ $cloudera_deps = <<SCRIPT
    # Cloudera Hadoop installation
    yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala*
    # format namenode
    if [ ! -e /var/lib/hadoop-hdfs/cache/hdfs ]; then
    echo "Formatting HDFS..." && sudo -u hdfs hdfs namenode -format -force &> /dev/null
    fi
    # auto-start services
    chkconfig hadoop-hdfs-namenode on \
    && chkconfig hadoop-hdfs-datanode on \
    && chkconfig hadoop-yarn-resourcemanager on \
    && chkconfig hadoop-yarn-nodemanager on \
    && chkconfig hive-metastore on \
    && chkconfig hive-server2 on \
    && chkconfig oozie on
    # start hadoop processes
    if [ ! "$(ps aux | grep namenode | wc -l)" == "2" ]; then
    service hadoop-hdfs-namenode start
    fi
    if [ ! "$(ps aux | grep datanode | wc -l)" == "2" ]; then
    service hadoop-hdfs-datanode start
    fi
    if [ ! "$(ps aux | grep resourcemanager | wc -l)" == "2" ]; then
    service hadoop-yarn-resourcemanager start
    fi
    if [ ! "$(ps aux | grep nodemanager | wc -l)" == "2" ]; then
    service hadoop-yarn-nodemanager start
    fi
    SCRIPT

    # OS configuration
    $system_config = <<SCRIPT
    DEV_USER=hadoop_oozie
  13. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 13 additions and 3 deletions.
    16 changes: 13 additions & 3 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -4,7 +4,9 @@ $anaconda_deps = <<SCRIPT
    if [ ! -d "/usr/local/anaconda" ]; then
    echo "Anaconda installation..." \
    && echo "getting binaries" \
    && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \
    && echo "running installer" \
    && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda
    fi
    @@ -54,12 +56,20 @@ $spark_deps = <<SCRIPT
    [ ! -e /opt/${SPARK_TGZ} ] && exit 1
    echo "Spark configuration..."
    echo "configuring /etc/profile.d/spark.sh"
    echo 'export PATH=$PATH'":${SPARK_LINK}/bin" > /etc/profile.d/spark.sh
    echo "HADOOP_CONF_DIR=/etc/hadoop/conf/" > /opt/spark/conf/spark-env.sh
    echo "SPARK_DIST_CLASSPATH=$(hadoop classpath)" >> /opt/spark/conf/spark-env.sh
    echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/cloudera/parcels/CDH/lib/hadoop/lib/native/" >> ${SPARK_LINK}/conf/spark-env.sh
    echo "configuring /opt/spark/conf/spark-env.sh"
    cat << SPCNF > /opt/spark/conf/spark-env.sh
    HADOOP_CONF_DIR=/etc/hadoop/conf/
    SPARK_DIST_CLASSPATH=\\$(hadoop classpath)
    LD_LIBRARY_PATH=\\${LD_LIBRARY_PATH}:/opt/cloudera/parcels/CDH/lib/hadoop/lib/native/
    SPCNF
    echo "configuring ${SPARK_LINK}/conf/spark-defaults.conf"
    cat << SPCNF > ${SPARK_LINK}/conf/spark-defaults.conf
    spark.yarn.jar hdfs:///user/spark/share/lib/spark-assembly-1.5.0-hadoop2.6.0.jar
  14. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 7 additions and 1 deletion.
    8 changes: 7 additions & 1 deletion Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -54,7 +54,7 @@ $spark_deps = <<SCRIPT
    [ ! -e /opt/${SPARK_TGZ} ] && exit 1
    echo "export PATH=\$PATH:${SPARK_LINK}/bin" > /etc/init.d/spark.sh
    echo 'export PATH=$PATH'":${SPARK_LINK}/bin" > /etc/profile.d/spark.sh
    echo "HADOOP_CONF_DIR=/etc/hadoop/conf/" > /opt/spark/conf/spark-env.sh
    echo "SPARK_DIST_CLASSPATH=$(hadoop classpath)" >> /opt/spark/conf/spark-env.sh
    @@ -124,6 +124,12 @@ $system_config = <<SCRIPT
    rm -fR ${PROXY_CONFIG}
    fi
    # Add entries to /etc/hosts
    ip=$(ifconfig eth1 | awk -v host=$(hostname) '/inet addr/ {print substr($2,6)}')
    host=$(hostname)
    echo "127.0.0.1 localhost" > /etc/hosts
    echo "$ip $host" >> /etc/hosts
    # Add a dev user - don't worry about the password
    if ! grep ${DEV_USER} /etc/passwd; then
    echo "Creating user ${DEV_USER}" && useradd -p $(openssl passwd -1 ${DEV_PASSWORD}) ${DEV_USER} \
  15. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 9 additions and 4 deletions.
    13 changes: 9 additions & 4 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -14,6 +14,7 @@ $mysql_deps = <<SCRIPT
    MYSQL_REPO=https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm
    MY_CNF=/etc/my.cnf
    DEV_PASSWORD=hadoop
    [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO}
    @@ -33,7 +34,7 @@ $mysql_deps = <<SCRIPT
    chkconfig mysqld on \
    && service mysqld start \
    && /usr/bin/mysqladmin -u root password 'hadoop' \
    && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" \
    && echo "# vagrant provisioned" >> ${MY_CNF}
    fi
    @@ -112,12 +113,16 @@ $system_config = <<SCRIPT
    DEV_USER=hadoop_oozie
    DEV_PASSWORD=hadoop
    PROXY_CONFIG=/etc/profile.d/proxy.sh
    service iptables stop && chkconfig iptables off
    [ ! -e /etc/profile.d/proxy.sh ] \
    && echo "export http_proxy=http://internalproxy.corp.ryanair.com:3128" >> /etc/profile.d/proxy.sh \
    && echo "export https_proxy=http://internalproxy.corp.ryanair.com:3128" >> /etc/profile.d/proxy.sh
    if grep ryanair /etc/resolv.conf; then
    echo "export http_proxy=http://internalproxy.corp.ryanair.com:3128" > ${PROXY_CONFIG} \
    && echo "export https_proxy=http://internalproxy.corp.ryanair.com:3128" >> ${PROXY_CONFIG}
    else
    rm -fR ${PROXY_CONFIG}
    fi
    # Add a dev user - don't worry about the password
    if ! grep ${DEV_USER} /etc/passwd; then
  16. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 93 additions and 8 deletions.
    101 changes: 93 additions & 8 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -3,19 +3,96 @@ $anaconda_deps = <<SCRIPT
    ANACONDA_INSTALLER=https://3230d63b5fc54e62148e-c95ac804525aac4b6dba79b00b39d1d3.ssl.cf1.rackcdn.com/Anaconda-2.3.0-Linux-x86_64.sh
    if [ ! -d "/usr/local/anaconda" ]; then
    echo "Installing Anaconda..." \
    echo "Anaconda installation..." \
    && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \
    && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda
    fi
    SCRIPT

    $mysql_deps = <<SCRIPT
    [ ! -e /etc/yum.repos.d/mysql-community.repo ] \
    && rpm -ivh https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm
    MYSQL_REPO=https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm
    MY_CNF=/etc/my.cnf
    [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO}
    yum install -y mysql-community-server
    if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then
    echo "# InnoDB settings" >> ${MY_CNF}
    echo "innodb_file_per_table = 1" >> ${MY_CNF}
    echo "innodb_flush_log_at_trx_commit = 2" >> ${MY_CNF}
    echo "innodb_log_buffer_size = 64M" >> ${MY_CNF}
    echo "innodb_buffer_pool_size = 1G" >> ${MY_CNF}
    echo "innodb_thread_concurrency = 8" >> ${MY_CNF}
    echo "innodb_flush_method = O_DIRECT" >> ${MY_CNF}
    echo "innodb_log_file_size = 512M" >> ${MY_CNF}
    echo "explicit_defaults_for_timestamp = 1" >> ${MY_CNF}
    chkconfig mysqld on \
    && service mysqld start \
    && /usr/bin/mysqladmin -u root password 'hadoop' \
    && echo "# vagrant provisioned" >> ${MY_CNF}
    fi
    SCRIPT

    $spark_deps = <<SCRIPT
    SPARK_TGZ=spark-1.5.1-bin-without-hadoop.tgz
    SPARK_LINK=/opt/spark
    [ ! -e ${SPARK_LINK} ] \
    && echo "Spark installation..." \
    && wget http://ftp.heanet.ie/mirrors/www.apache.org/dist/spark/spark-1.5.1/${SPARK_TGZ} -q -P /opt/ \
    && tar zxf /opt/${SPARK_TGZ} -C /opt/ \
    && ln -s /opt/spark-1.5.1-bin-without-hadoop ${SPARK_LINK}
    [ ! -e /opt/${SPARK_TGZ} ] && exit 1
    echo "export PATH=\$PATH:${SPARK_LINK}/bin" > /etc/init.d/spark.sh
    echo "HADOOP_CONF_DIR=/etc/hadoop/conf/" > /opt/spark/conf/spark-env.sh
    echo "SPARK_DIST_CLASSPATH=$(hadoop classpath)" >> /opt/spark/conf/spark-env.sh
    echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/cloudera/parcels/CDH/lib/hadoop/lib/native/" >> ${SPARK_LINK}/conf/spark-env.sh
    cat << SPCNF > ${SPARK_LINK}/conf/spark-defaults.conf
    spark.yarn.jar hdfs:///user/spark/share/lib/spark-assembly-1.5.0-hadoop2.6.0.jar
    spark.shuffle.service.enabled true
    # Execution Behavior
    spark.broadcast.blockSize 4096
    # Dynamic Resource Allocation (YARN)
    spark.dynamicAllocation.enabled true
    spark.speculation true
    spark.scheduler.mode FAIR
    spark.kryoserializer.buffer.max 1000m
    spark.driver.maxResultSize 0
    spark.serializer org.apache.spark.serializer.KryoSerializer
    spark.yarn.preserve.staging.files false
    spark.master yarn
    spark.rdd.compress true
    # Local execution of selected Spark functions
    spark.localExecution.enabled true
    spark.sql.parquet.binaryAsString true
    spark.sql.parquet.compression.codec snappy
    # use lz4 compression for broadcast variables as Snappy is not supported on MacOSX
    spark.broadcast.compress true
    spark.io.compression.codec lz4
    spark.driver.extraLibraryPath /opt/cloudera/parcels/CDH/lib/hadoop/lib/native
    spark.executor.extraLibraryPath /opt/cloudera/parcels/CDH/lib/hadoop/lib/native
    spark.executor.extraJavaOptions -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UseCompressedOops
    spark.driver.extraJavaOptions -XX:+UseCompressedOops -XX:MaxPermSize=1g
    spark.executor.extraClassPath /usr/local/lib/jdbc/sqlserver/*.jar:/usr/local/lib/jdbc/mysql/*.jar:/usr/local/anaconda/bin:/opt/udfs/hive/*.jar
    spark.driver.extraClassPath /usr/local/lib/jdbc/sqlserver/*.jar:/usr/local/lib/jdbc/mysql/*.jar:/usr/local/anaconda/bin:/opt/udfs/hive/*.jar
    SPCNF
    echo "Add hive-site.xml configuration here !!!"
    SCRIPT

    $cloudera_deps = <<SCRIPT
    @@ -32,10 +109,16 @@ $cloudera_deps = <<SCRIPT
    SCRIPT

    $system_config = <<SCRIPT
    DEV_USER=hadoop_oozie
    DEV_PASSWORD=hadoop
    service iptables stop && chkconfig iptables off
    [ ! -e /etc/profile.d/proxy.sh ] \
    && echo "export http_proxy=http://internalproxy.corp.ryanair.com:3128" >> /etc/profile.d/proxy.sh \
    && echo "export https_proxy=http://internalproxy.corp.ryanair.com:3128" >> /etc/profile.d/proxy.sh
    # Add a dev user - don't worry about the password
    if ! grep ${DEV_USER} /etc/passwd; then
    echo "Creating user ${DEV_USER}" && useradd -p $(openssl passwd -1 ${DEV_PASSWORD}) ${DEV_USER} \
    @@ -47,19 +130,21 @@ SCRIPT
    Vagrant.configure(2) do |config|

    config.vm.box = "boxcutter/centos66"
    config.vm.hostname = "cdh.home.com"
    config.vm.network :public_network, bridge: "en0: Wi-Fi (AirPort)"
    config.vm.hostname = "cdh.instance.com"
    config.vm.network :public_network, :bridge => "en3: Thunderbolt Ethernet", :mac => "0800DEADBEEF"

    config.vm.provider "virtualbox" do |vb|
    vb.name = "cloudera-hadoop"
    vb.cpus = 4
    vb.memory = 8192
    vb.customize ["modifyvm", :id, "--nicpromisc2", "allow-all"]
    vb.customize ["modifyvm", :id, "--cpuexecutioncap", "100"]
    end

    config.vm.provision :shell, :name => "system_config", :inline => $system_config
    config.vm.provision :shell, :name => "anaconda_deps", :inline => $anaconda_deps
    config.vm.provision :shell, :name => "mysql_deps", :inline => $mysql_deps
    config.vm.provision :shell, :name => "spark_deps", :inline => $spark_deps
    config.vm.provision :shell, :name => "cloudera_deps", :inline => $cloudera_deps
    config.vm.provision :shell, :name => "system_config", :inline => $system_config

    end
  17. bartekdobija revised this gist Oct 19, 2015. 1 changed file with 11 additions and 3 deletions.
    14 changes: 11 additions & 3 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -10,14 +10,21 @@ $anaconda_deps = <<SCRIPT
    SCRIPT

    $mysql_deps = <<SCRIPT
    [ ! -e /etc/yum.repos.d/mysql-community.repo ] \
    && rpm -ivh https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm
    yum install -y mysql-community-server
    SCRIPT

    $cloudera_deps = <<SCRIPT
    CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo
    # Add Cloudera repository
    [ ! -e /etc/yum.repos.d/cloudera-cdh5.repo ] \
    && wget ${CLOUDERA_REPO} -q -P /etc/yum.repos.d/ \
    && yum clean all
    && wget ${CLOUDERA_REPO} -q -P /etc/yum.repos.d/
    # Cloudera Hadoop installation
    yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala*
    @@ -44,13 +51,14 @@ Vagrant.configure(2) do |config|
    config.vm.network :public_network, bridge: "en0: Wi-Fi (AirPort)"

    config.vm.provider "virtualbox" do |vb|
    vb.name = "vagrant-cdh"
    vb.name = "cloudera-hadoop"
    vb.cpus = 4
    vb.memory = 8192
    vb.customize ["modifyvm", :id, "--cpuexecutioncap", "100"]
    end

    config.vm.provision :shell, :name => "anaconda_deps", :inline => $anaconda_deps
    config.vm.provision :shell, :name => "mysql_deps", :inline => $mysql_deps
    config.vm.provision :shell, :name => "cloudera_deps", :inline => $cloudera_deps
    config.vm.provision :shell, :name => "system_config", :inline => $system_config

  18. bartekdobija revised this gist Oct 18, 2015. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -20,7 +20,7 @@ $cloudera_deps = <<SCRIPT
    && yum clean all
    # Cloudera Hadoop installation
    yum install -y hadoop zookeeper oozie sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala*
    yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala*
    SCRIPT

  19. bartekdobija revised this gist Oct 18, 2015. 1 changed file with 22 additions and 10 deletions.
    32 changes: 22 additions & 10 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -1,15 +1,18 @@
    $dependency_installation = <<SCRIPT
    $anaconda_deps = <<SCRIPT
    CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo
    ANACONDA_INSTALLER=https://3230d63b5fc54e62148e-c95ac804525aac4b6dba79b00b39d1d3.ssl.cf1.rackcdn.com/Anaconda-2.3.0-Linux-x86_64.sh
    DEV_USER=hadoop_oozie
    DEV_PASSWORD=hadoop
    # Anaconda installation
    [ ! -d "/usr/local/anaconda" ] \
    && echo "Installing Anaconda..." \
    && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \
    && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda
    if [ ! -d "/usr/local/anaconda" ]; then
    echo "Installing Anaconda..." \
    && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \
    && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda
    fi
    SCRIPT

    $cloudera_deps = <<SCRIPT
    CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo
    # Add Cloudera repository
    [ ! -e /etc/yum.repos.d/cloudera-cdh5.repo ] \
    @@ -19,6 +22,13 @@ $dependency_installation = <<SCRIPT
    # Cloudera Hadoop installation
    yum install -y hadoop zookeeper oozie sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala*
    SCRIPT

    $system_config = <<SCRIPT
    DEV_USER=hadoop_oozie
    DEV_PASSWORD=hadoop
    # Add a dev user - don't worry about the password
    if ! grep ${DEV_USER} /etc/passwd; then
    echo "Creating user ${DEV_USER}" && useradd -p $(openssl passwd -1 ${DEV_PASSWORD}) ${DEV_USER} \
    @@ -40,6 +50,8 @@ Vagrant.configure(2) do |config|
    vb.customize ["modifyvm", :id, "--cpuexecutioncap", "100"]
    end

    config.vm.provision :shell, :name => "dep_installer", :inline => $dependency_installation
    config.vm.provision :shell, :name => "anaconda_deps", :inline => $anaconda_deps
    config.vm.provision :shell, :name => "cloudera_deps", :inline => $cloudera_deps
    config.vm.provision :shell, :name => "system_config", :inline => $system_config

    end
  20. bartekdobija created this gist Oct 18, 2015.
    45 changes: 45 additions & 0 deletions Vagrantfile
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,45 @@
    $dependency_installation = <<SCRIPT
    CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo
    ANACONDA_INSTALLER=https://3230d63b5fc54e62148e-c95ac804525aac4b6dba79b00b39d1d3.ssl.cf1.rackcdn.com/Anaconda-2.3.0-Linux-x86_64.sh
    DEV_USER=hadoop_oozie
    DEV_PASSWORD=hadoop
    # Anaconda installation
    [ ! -d "/usr/local/anaconda" ] \
    && echo "Installing Anaconda..." \
    && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \
    && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda
    # Add Cloudera repository
    [ ! -e /etc/yum.repos.d/cloudera-cdh5.repo ] \
    && wget ${CLOUDERA_REPO} -q -P /etc/yum.repos.d/ \
    && yum clean all
    # Cloudera Hadoop installation
    yum install -y hadoop zookeeper oozie sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala*
    # Add a dev user - don't worry about the password
    if ! grep ${DEV_USER} /etc/passwd; then
    echo "Creating user ${DEV_USER}" && useradd -p $(openssl passwd -1 ${DEV_PASSWORD}) ${DEV_USER} \
    && echo "${DEV_USER} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/hadoop_oozie
    fi
    SCRIPT

    Vagrant.configure(2) do |config|

    config.vm.box = "boxcutter/centos66"
    config.vm.hostname = "cdh.home.com"
    config.vm.network :public_network, bridge: "en0: Wi-Fi (AirPort)"

    config.vm.provider "virtualbox" do |vb|
    vb.name = "vagrant-cdh"
    vb.cpus = 4
    vb.memory = 8192
    vb.customize ["modifyvm", :id, "--cpuexecutioncap", "100"]
    end

    config.vm.provision :shell, :name => "dep_installer", :inline => $dependency_installation

    end