Last active
October 30, 2015 14:52
-
-
Save bartekdobija/6a95b5c7352e85d0d401 to your computer and use it in GitHub Desktop.
Revisions
-
bartekdobija revised this gist
Oct 30, 2015 . 1 changed file with 44 additions and 14 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -153,7 +153,7 @@ $cloudera_deps = <<SCRIPT # Cloudera Hadoop installation yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop \ hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode \ hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc \ hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager \ hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server \ @@ -165,19 +165,19 @@ $cloudera_deps = <<SCRIPT <configuration> <property> <name>mapred.job.tracker</name> <value>cdh.instance.com:8021</value> </property> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>cdh.instance.com:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>cdh.instance.com:19888</value> </property> <property> <name>mapreduce.task.tmp.dir</name> @@ -191,6 +191,10 @@ $cloudera_deps = <<SCRIPT <name>mapreduce.reduce.memory.mb</name> <value>512</value> </property> <property> <name>yarn.app.mapreduce.am.staging-dir</name> <value>/user</value> </property> </configuration> HDPCNF @@ -292,6 +296,10 @@ YRNCNF <name>dfs.namenode.rpc-bind-host</name> <value>0.0.0.0</value> </property> <property> <name>dfs.namenode.acls.enabled</name> <value>true</value> </property> </configuration> HDFSCNF @@ -373,16 +381,20 @@ HIVECNF service hadoop-yarn-nodemanager start fi echo "Creating HDFS directory structure" \ && sudo -u hdfs hdfs dfs -mkdir -p {/user/{hadoop_oozie,spark,hive/warehouse,oozie/share/lib},/tmp,/jobs,/var/log/hadoop-yarn,/user/history} \ && sudo -u hdfs hdfs dfs -chown -R hive:hive /user/hive \ && sudo -u hdfs hdfs dfs -chown -R mapred:hadoop /user/history \ && sudo -u hdfs hdfs dfs -chmod -R 1777 /user/history \ && sudo -u hdfs hdfs dfs -chown -R oozie:oozie /user/oozie \ && sudo -u hdfs hdfs dfs -chown -R hadoop_oozie:hadoop_oozie /user/hadoop_oozie \ && sudo -u hdfs hdfs dfs -chown -R yarn:mapred /var/log/hadoop-yarn \ && sudo -u hdfs hdfs dfs -chmod -R 1777 / # history server must start after hdfs privileges have been fixed if [ ! "$(ps aux | grep historyserver | wc -l)" == "2" ]; then service hadoop-mapreduce-historyserver start fi # start Hive processses if [ ! "$(ps aux | grep HiveMetaStore | wc -l)" == "2" ]; then @@ -450,10 +462,20 @@ HIVECNF <name>oozie.use.system.libpath</name> <value>true</value> </property> <property> <name>oozie.credentials.credentialclasses</name> <value> hcat=com.github.bartekdobija.oozieutils.creds.TestCreds, hive=com.github.bartekdobija.oozieutils.creds.TestCreds, hbase=com.github.bartekdobija.oozieutils.creds.TestCreds </value> </property> </configuration> OOZCNF OOZIE_UTILS=https://github.com/bartekdobija/oozie-utils/releases/download/0.7/oozieutils-0.7.jar # create an Oozie database if not exists and upload sharelib if [ ! -f /var/lib/mysql/oozie/WF_JOBS.frm ]; then @@ -467,7 +489,9 @@ OOZCNF && chown -R oozie:oozie /var/log/oozie \ && sudo -u oozie /usr/lib/oozie/bin/oozie-setup.sh sharelib create \ -fs hdfs://localhost/user/oozie/share/lib/ \ -locallib /usr/lib/oozie/oozie-sharelib \ && rm -fR /usr/lib/oozie/libserver/oozieutils* \ && wget ${OOZIE_UTILS} -q -P /usr/lib/oozie/libserver/ fi echo "registering Spark configuration in Oozie" \ @@ -546,20 +570,26 @@ $couchbase_deps= <<SCRIPT && /opt/couchbase/bin/couchbase-cli bucket-create -c localhost \ --bucket=user_profile_versions \ --bucket-type=couchbase \ --bucket-ramsize=100 \ --bucket-replica=1 \ --bucket-priority=high \ --bucket-password=couchbase \ -u couchbase \ -p couchbase \ --bucket-password=couchbase \ && /opt/couchbase/bin/couchbase-cli bucket-create -c localhost \ --bucket=user_profile \ --bucket-type=couchbase \ --bucket-ramsize=100 \ --bucket-replica=1 \ --bucket-priority=high \ --bucket-password=couchbase \ -u couchbase \ -p couchbase echo "couchbase" > /tmp/.couchbaseCreds \ && sudo -u hadoop_oozie hdfs dfs -copyFromLocal -f /tmp/.couchbaseCreds /user/hadoop_oozie/ fi SCRIPT -
bartekdobija revised this gist
Oct 29, 2015 . 1 changed file with 175 additions and 54 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -13,49 +13,17 @@ $anaconda_deps = <<SCRIPT SCRIPT # Spark dependencies $spark_deps = <<SCRIPT SPARK_VER=spark-1.5.0-bin-without-hadoop SPARK_BIN=http://stash.ryanair.com:7990/projects/BI/repos/spark-cdh-vagrant/browse/spark/${SPARK_VER}.tgz?raw SPARK_OUT=${SPARK_VER}.tgz SPARK_LINK=/opt/spark [ ! -e ${SPARK_LINK} ] \ && echo "Spark installation..." \ && wget ${SPARK_BIN} -q -O /tmp/${SPARK_OUT} \ && tar zxf /vagrant/spark/${SPARK_OUT} -C /opt/ \ && ln -s /opt/${SPARK_VER} ${SPARK_LINK} [ ! -e ${SPARK_LINK} ] && echo "Spark installation has failed!" && exit 1 @@ -139,6 +107,41 @@ HIVECNF SCRIPT # MySQL dependencies $mysql_deps = <<SCRIPT MYSQL_REPO=https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm MY_CNF=/etc/my.cnf DEV_PASSWORD=hadoop [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO} yum install -y mysql-community-server if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then echo "# InnoDB settings" >> ${MY_CNF} echo "default_storage_engine = innodb" >> ${MY_CNF} echo "innodb_file_per_table = 1" >> ${MY_CNF} echo "innodb_flush_log_at_trx_commit = 2" >> ${MY_CNF} echo "innodb_log_buffer_size = 64M" >> ${MY_CNF} echo "innodb_buffer_pool_size = 1G" >> ${MY_CNF} echo "innodb_thread_concurrency = 8" >> ${MY_CNF} echo "innodb_flush_method = O_DIRECT" >> ${MY_CNF} echo "innodb_log_file_size = 512M" >> ${MY_CNF} echo "explicit_defaults_for_timestamp = 1" >> ${MY_CNF} chkconfig mysqld on \ && service mysqld start \ && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" &> /dev/null \ && echo "# vagrant provisioned" >> ${MY_CNF} mysql -u root -p${DEV_PASSWORD} \ -e "create schema if not exists hive; grant all on hive.* to 'hive'@'localhost' identified by 'hive'" \ && mysql -u root -p${DEV_PASSWORD} \ -e "create schema if not exists oozie; grant all on oozie.* to 'oozie'@'localhost' identified by 'oozie'" fi SCRIPT # Cloudera CDH dependencies $cloudera_deps = <<SCRIPT @@ -155,7 +158,7 @@ $cloudera_deps = <<SCRIPT hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager \ hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server \ oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog \ hive-jdbc avro-libs pig kite impala* openssl-devel openssl cat << HDPCNF > /etc/hadoop/conf/mapred-site.xml @@ -238,6 +241,61 @@ HDPCNF YRNCNF cat << HDFSCNF > /etc/hadoop/conf/hdfs-site.xml <configuration> <property> <name>dfs.replication</name> <value>1</value> </property> <property> <name>dfs.safemode.extension</name> <value>0</value> </property> <property> <name>dfs.safemode.min.datanodes</name> <value>1</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/var/lib/hadoop-hdfs/cache/\\${user.name}</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>file:///var/lib/hadoop-hdfs/cache/\\${user.name}/dfs/name</value> </property> <property> <name>dfs.namenode.checkpoint.dir</name> <value>file:///var/lib/hadoop-hdfs/cache/\\${user.name}/dfs/namesecondary</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:///var/lib/hadoop-hdfs/cache/\\${user.name}/dfs/data</value> </property> <property> <name>dfs.client.read.shortcircuit</name> <value>true</value> </property> <property> <name>dfs.client.file-block-storage-locations.timeout.millis</name> <value>10000</value> </property> <property> <name>dfs.domain.socket.path</name> <value>/var/run/hadoop-hdfs/dn._PORT</value> </property> <property> <name>dfs.datanode.hdfs-blocks-metadata.enabled</name> <value>true</value> </property> <property> <name>dfs.namenode.rpc-bind-host</name> <value>0.0.0.0</value> </property> </configuration> HDFSCNF # format namenode if [ ! -e /var/lib/hadoop-hdfs/cache/hdfs ]; then echo "Formatting HDFS..." \ @@ -293,6 +351,7 @@ HIVECNF && chkconfig hadoop-hdfs-datanode on \ && chkconfig hadoop-yarn-resourcemanager on \ && chkconfig hadoop-yarn-nodemanager on \ && chkconfig hadoop-mapreduce-historyserver on \ && chkconfig hive-metastore on \ && chkconfig hive-server2 on \ && chkconfig oozie on @@ -314,18 +373,16 @@ HIVECNF service hadoop-yarn-nodemanager start fi if [ ! "$(ps aux | grep historyserver | wc -l)" == "2" ]; then service hadoop-mapreduce-historyserver start fi echo "Creating HDFS directory structure" \ && sudo -u hdfs hdfs dfs -mkdir -p {/user/{spark,hive/warehouse,oozie/share/lib},/tmp,/jobs,/var/log/hadoop-yarn} \ && sudo -u hdfs hdfs dfs -chmod -R 777 / \ && sudo -u hdfs hdfs dfs -chown -R hive:hive /user/hive \ && sudo -u hdfs hdfs dfs -chown -R oozie:oozie /user/oozie \ && sudo -u hdfs hdfs dfs -chown -R yarn:yarn /var/log/hadoop-yarn # start Hive processses if [ ! "$(ps aux | grep HiveMetaStore | wc -l)" == "2" ]; then @@ -381,10 +438,6 @@ HIVECNF <name>oozie.service.ProxyUserService.proxyuser.hue.groups</name> <value>*</value> </property> <property> <name>use.system.libpath.for.mapreduce.and.pig.jobs</name> <value>true</value> @@ -393,16 +446,28 @@ HIVECNF <name>oozie.service.PurgeService.purge.old.coord.action</name> <value>true</value> </property> <property> <name>oozie.use.system.libpath</name> <value>true</value> </property> </configuration> OOZCNF # create an Oozie database if not exists and upload sharelib if [ ! -f /var/lib/mysql/oozie/WF_JOBS.frm ]; then mkdir -p /user/oozie/share/lib \ && chown -R oozie:oozie /user/oozie \ && rm -fR /etc/oozie/conf/hadoop-conf \ && ln -s /etc/hadoop/conf /etc/oozie/conf/hadoop-conf echo "Creating Oozie database" \ && /usr/lib/oozie/bin/ooziedb.sh create -run \ && chown -R oozie:oozie /var/log/oozie \ && sudo -u oozie /usr/lib/oozie/bin/oozie-setup.sh sharelib create \ -fs hdfs://localhost/user/oozie/share/lib/ \ -locallib /usr/lib/oozie/oozie-sharelib fi echo "registering Spark configuration in Oozie" \ @@ -412,11 +477,25 @@ OOZCNF service oozie start fi echo "export OOZIE_URL=http://localhost:11000/oozie" > /etc/profile.d/oozie.sh SCRIPT # OS configuration $system_config = <<SCRIPT # disable IPv6 if [ "$(grep disable_ipv6 /etc/sysctl.conf | wc -l)" == "0" ]; then echo "net.ipv6.conf.all.disable_ipv6=1" >> /etc/sysctl.conf \ && sysctl -f /etc/sysctl.conf fi # this should be a persistent config ulimit -n 65536 ulimit -s 10240 ulimit -c unlimited DEV_USER=hadoop_oozie DEV_PASSWORD=hadoop PROXY_CONFIG=/etc/profile.d/proxy.sh @@ -442,6 +521,47 @@ $system_config = <<SCRIPT && echo "${DEV_USER} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/hadoop_oozie fi if [ "$(grep vm.swappiness /etc/sysctl.conf | wc -l)" == "0" ]; then echo "vm.swappiness=0" >> /etc/sysctl.conf && sysctl vm.swappiness=0 fi SCRIPT # Couchbase dependencies $couchbase_deps= <<SCRIPT COUCHBASE_VER=couchbase-server-community-3.0.1-centos6.x86_64.rpm COUCHBASE_LINK=/opt/couchbase if [ ! -e ${COUCHBASE_LINK} ]; then wget http://packages.couchbase.com/releases/3.0.1/${COUCHBASE_VER} -q -P /tmp/ \ && rpm -iv /tmp/${COUCHBASE_VER} \ && chkconfig couchbase-server on \ && sleep 20 \ && /opt/couchbase/bin/couchbase-cli cluster-init \ -c localhost \ -u couchbase \ -p couchbase \ --cluster-ramsize=350 \ && /opt/couchbase/bin/couchbase-cli bucket-create -c localhost \ --bucket=user_profile_versions \ --bucket-type=couchbase \ --bucket-ramsize=150 \ --bucket-replica=1 \ --bucket-priority=high \ -u couchbase \ -p couchbase \ && /opt/couchbase/bin/couchbase-cli bucket-create -c localhost \ --bucket=user_profile \ --bucket-type=couchbase \ --bucket-ramsize=150 \ --bucket-replica=1 \ --bucket-priority=high \ -u couchbase \ -p couchbase fi SCRIPT $information = <<SCRIPT @@ -463,7 +583,7 @@ Vagrant.configure(2) do |config| config.vm.network :public_network, :mac => "0800DEADBEEF" config.vm.provider "virtualbox" do |vb| vb.name = "dev-hadoop-env" vb.cpus = 4 vb.memory = 8192 vb.customize ["modifyvm", :id, "--nicpromisc2", "allow-all"] @@ -475,6 +595,7 @@ Vagrant.configure(2) do |config| config.vm.provision :shell, :name => "mysql_deps", :inline => $mysql_deps config.vm.provision :shell, :name => "spark_deps", :inline => $spark_deps config.vm.provision :shell, :name => "cloudera_deps", :inline => $cloudera_deps config.vm.provision :shell, :name => "couchbase_deps", :inline => $couchbase_deps config.vm.provision :shell, :name => "information", :inline => $information end -
bartekdobija revised this gist
Oct 20, 2015 . 1 changed file with 165 additions and 86 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -22,7 +22,7 @@ $mysql_deps = <<SCRIPT [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO} yum install -y mysql-community-server if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then echo "# InnoDB settings" >> ${MY_CNF} @@ -39,25 +39,26 @@ $mysql_deps = <<SCRIPT && service mysqld start \ && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" &> /dev/null \ && echo "# vagrant provisioned" >> ${MY_CNF} mysql -u root -p${DEV_PASSWORD} \ -e "create schema if not exists hive; grant all on hive.* to 'hive'@'localhost' identified by 'hive'" \ && mysql -u root -p${DEV_PASSWORD} \ -e "create schema if not exists oozie; grant all on oozie.* to 'oozie'@'localhost' identified by 'oozie'" fi SCRIPT # Spark dependencies $spark_deps = <<SCRIPT SPARK_VER=spark-1.5.0-bin-without-hadoop SPARK_LINK=/opt/spark [ ! -e ${SPARK_LINK} ] \ && echo "Spark installation..." \ && tar zxf /vagrant/spark/${SPARK_VER}.tgz -C /opt/ \ && ln -s /opt/${SPARK_VER} ${SPARK_LINK} [ ! -e ${SPARK_LINK} ] && echo "Spark installation has failed!" && exit 1 echo "Spark configuration..." echo "configuring /etc/profile.d/spark.sh" @@ -159,85 +160,80 @@ $cloudera_deps = <<SCRIPT cat << HDPCNF > /etc/hadoop/conf/mapred-site.xml <configuration> <property> <name>mapred.job.tracker</name> <value>localhost:8021</value> </property> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>localhost:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>localhost:19888</value> </property> <property> <name>mapreduce.task.tmp.dir</name> <value>/var/lib/hadoop-mapreduce/cache/\\${user.name}/tasks</value> </property> <property> <name>mapreduce.map.memory.mb</name> <value>512</value> </property> <property> <name>mapreduce.reduce.memory.mb</name> <value>512</value> </property> </configuration> HDPCNF cat << YRNCNF > /etc/hadoop/conf/yarn-site.xml <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle,spark_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.aux-services.spark_shuffle.class</name> <value>org.apache.spark.network.yarn.YarnShuffleService</value> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <property> <name>yarn.dispatcher.exit-on-error</name> <value>true</value> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/var/lib/hadoop-yarn/cache/\\${user.name}/nm-local-dir</value> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>/var/log/hadoop-yarn/containers</value> </property> <property> <name>yarn.nodemanager.remote-app-log-dir</name> <value>/var/log/hadoop-yarn/apps</value> </property> <property> <name>yarn.application.classpath</name> <value>\\$HADOOP_CONF_DIR,\\$HADOOP_COMMON_HOME/*,\\$HADOOP_COMMON_HOME/lib/*,\\$HADOOP_HDFS_HOME/*, \\$HADOOP_HDFS_HOME/lib/*,\\$HADOOP_MAPRED_HOME/*,\\$HADOOP_MAPRED_HOME/lib/*,\\$HADOOP_YARN_HOME/*, \\$HADOOP_YARN_HOME/lib/* </value> </property> </configuration> YRNCNF @@ -256,8 +252,10 @@ YRNCNF && wget ${MYSQL_JDBC_SOURCE} -q -P /tmp/ \ && echo "Installing MySQL JDBC drivers" \ && tar zxf /tmp/${MYSQL_JDBC}.tar.gz -C /tmp/ \ && mkdir -p /usr/lib/oozie/libext \ && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/lib/hive/lib/ \ && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/local/lib/jdbc/mysql/ \ && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/lib/oozie/libext/ cat << HIVECNF > /etc/hive/conf/hive-site.xml @@ -299,7 +297,7 @@ HIVECNF && chkconfig hive-server2 on \ && chkconfig oozie on # start Hadoop processses if [ ! "$(ps aux | grep hdfs-namenode | wc -l)" == "2" ]; then service hadoop-hdfs-namenode start fi @@ -324,9 +322,12 @@ HIVECNF && sudo -u hdfs hdfs dfs -mkdir -p /tmp \ && sudo -u hdfs hdfs dfs -chmod -R 777 /tmp \ && sudo -u hdfs hdfs dfs -mkdir -p /user/hive/warehouse \ && sudo -u hdfs hdfs dfs -chown -R hive:hive /user/hive \ && sudo -u hdfs hdfs dfs -chmod -R 755 /user/hive/warehouse \ && sudo -u hdfs hdfs dfs -mkdir -p /user/oozie/share/lib \ && sudo -u hdfs hdfs dfs -chown -R oozie:oozie /user/oozie # start Hive processses if [ ! "$(ps aux | grep HiveMetaStore | wc -l)" == "2" ]; then service hive-metastore start fi @@ -335,6 +336,82 @@ HIVECNF service hive-server2 start fi # Oozie configuration echo "Deploying oozie-site.xml" cat << OOZCNF > /etc/oozie/conf/oozie-site.xml <configuration> <property> <name>oozie.service.JPAService.create.db.schema</name> <value>true</value> </property> <property> <name>oozie.service.JPAService.validate.db.connection</name> <value>true</value> </property> <property> <name>oozie.service.JPAService.jdbc.driver</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>oozie.service.JPAService.jdbc.url</name> <value>jdbc:mysql://localhost:3306/oozie?createDatabaseIfNotExist=true</value> </property> <property> <name>oozie.service.JPAService.jdbc.username</name> <value>oozie</value> </property> <property> <name>oozie.service.JPAService.jdbc.password</name> <value>oozie</value> </property> <property> <name>oozie.service.ProxyUserService.proxyuser.oozie.hosts</name> <value>*</value> </property> <property> <name>oozie.service.ProxyUserService.proxyuser.oozie.groups</name> <value>*</value> </property> <property> <name>oozie.service.ProxyUserService.proxyuser.hue.hosts</name> <value>*</value> </property> <property> <name>oozie.service.ProxyUserService.proxyuser.hue.groups</name> <value>*</value> </property> <property> <name>oozie.service.WorkflowAppService.system.libpath</name> <value>/usr/lib/oozie/oozie-sharelib</value> </property> <property> <name>use.system.libpath.for.mapreduce.and.pig.jobs</name> <value>true</value> </property> <property> <name>oozie.service.PurgeService.purge.old.coord.action</name> <value>true</value> </property> </configuration> OOZCNF # create an Oozie database if not exists and upload sharelib if [ ! -f /var/lib/mysql/oozie/WF_JOBS.frm ]; then echo "Creating Oozie database" \ && /usr/lib/oozie/bin/ooziedb.sh create -run \ && mkdir -p /opt/sharelib \ && /usr/lib/oozie/bin/oozie-setup.sh sharelib create -fs /opt/sharelib -locallib /usr/lib/oozie/oozie-sharelib fi echo "registering Spark configuration in Oozie" \ && ln -f -s /opt/spark/conf /etc/oozie/conf/spark-conf if [ ! "$(ps aux | grep oozie | wc -l)" == "2" ]; then service oozie start fi SCRIPT # OS configuration @@ -370,8 +447,10 @@ SCRIPT $information = <<SCRIPT ip=$(ifconfig eth1 | awk -v host=$(hostname) '/inet addr/ {print substr($2,6)}') echo "Guest IP address: $ip" echo "Namenode UI available at: http://$ip:50070" echo "Resource Manager UI available at: http://$ip:8088" echo "Oozie endpoint available at: http://$ip:11000/oozie" echo "Spark 1.5 available under /opt/spark" echo "MySQL root password: hadoop" echo "You may want to add the below line to /etc/hosts:" echo "$ip cdh.instance.com" -
bartekdobija revised this gist
Oct 19, 2015 . No changes.There are no files selected for viewing
-
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -122,7 +122,7 @@ SPCNF </property> <property> <name>hive.metastore.uris</name> <value>thrift://cdh.instance.com:9083</value> </property> <property> <name>hive.metastore.warehouse.dir</name> @@ -280,7 +280,7 @@ YRNCNF </property> <property> <name>hive.metastore.uris</name> <value>thrift://cdh.instance.com:9083</value> </property> <property> <name>hive.metastore.warehouse.dir</name> -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 186 additions and 16 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,7 +5,7 @@ $anaconda_deps = <<SCRIPT if [ ! -d "/usr/local/anaconda" ]; then echo "Anaconda installation..." \ && echo "downloading binaries" \ && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \ && echo "running installer" \ && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda @@ -22,10 +22,9 @@ $mysql_deps = <<SCRIPT [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO} yum install -y mysql-community-server if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then echo "# InnoDB settings" >> ${MY_CNF} echo "default_storage_engine = innodb" >> ${MY_CNF} echo "innodb_file_per_table = 1" >> ${MY_CNF} @@ -36,12 +35,12 @@ $mysql_deps = <<SCRIPT echo "innodb_flush_method = O_DIRECT" >> ${MY_CNF} echo "innodb_log_file_size = 512M" >> ${MY_CNF} echo "explicit_defaults_for_timestamp = 1" >> ${MY_CNF} chkconfig mysqld on \ && service mysqld start \ && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" &> /dev/null \ && echo "# vagrant provisioned" >> ${MY_CNF} mysql -u root -p${DEV_PASSWORD} \ -e "create schema if not exists hive; grant all on hive.* to 'hive'@'localhost' identified by 'hive'" fi SCRIPT @@ -51,9 +50,9 @@ $spark_deps = <<SCRIPT SPARK_TGZ=spark-1.5.1-bin-without-hadoop.tgz SPARK_LINK=/opt/spark [ ! -e ${SPARK_LINK} ] \ && echo "Spark installation..." \ && echo "downloading binaries" \ && wget http://ftp.heanet.ie/mirrors/www.apache.org/dist/spark/spark-1.5.1/${SPARK_TGZ} -q -P /opt/ \ && tar zxf /opt/${SPARK_TGZ} -C /opt/ \ && ln -s /opt/spark-1.5.1-bin-without-hadoop ${SPARK_LINK} @@ -66,16 +65,13 @@ $spark_deps = <<SCRIPT echo "configuring /opt/spark/conf/spark-env.sh" cat << SPCNF > /opt/spark/conf/spark-env.sh HADOOP_CONF_DIR=/etc/hadoop/conf/ SPARK_DIST_CLASSPATH=\\$(hadoop classpath) LD_LIBRARY_PATH=\\${LD_LIBRARY_PATH}:/opt/cloudera/parcels/CDH/lib/hadoop/lib/native/ SPCNF echo "configuring ${SPARK_LINK}/conf/spark-defaults.conf" cat << SPCNF > ${SPARK_LINK}/conf/spark-defaults.conf spark.shuffle.service.enabled true # Execution Behavior spark.broadcast.blockSize 4096 @@ -102,10 +98,43 @@ spark.executor.extraJavaOptions -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTime spark.driver.extraJavaOptions -XX:+UseCompressedOops -XX:MaxPermSize=1g spark.executor.extraClassPath /usr/local/lib/jdbc/sqlserver/*.jar:/usr/local/lib/jdbc/mysql/*.jar:/usr/local/anaconda/bin:/opt/udfs/hive/*.jar spark.driver.extraClassPath /usr/local/lib/jdbc/sqlserver/*.jar:/usr/local/lib/jdbc/mysql/*.jar:/usr/local/anaconda/bin:/opt/udfs/hive/*.jar SPCNF echo "configuring ${SPARK_LINK}/conf/hive-site.xml" cat << HIVECNF > ${SPARK_LINK}/conf/hive-site.xml <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hive</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>hive</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>hive.metastore.uris</name> <value>thrift://localhost:9083</value> </property> <property> <name>hive.metastore.warehouse.dir</name> <value>hdfs:///user/hive/warehouse</value> </property> </configuration> HIVECNF echo "installing resource scheduler" \ && mkdir -p /usr/lib/hadoop-yarn/lib/ \ && cp -f ${SPARK_LINK}/lib/spark-*-yarn-shuffle.jar /usr/lib/hadoop-yarn/lib/ SCRIPT @@ -127,12 +156,140 @@ $cloudera_deps = <<SCRIPT oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog \ hive-jdbc avro-libs pig kite impala* cat << HDPCNF > /etc/hadoop/conf/mapred-site.xml <configuration> <property> <name>mapred.job.tracker</name> <value>localhost:8021</value> </property> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>localhost:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>localhost:19888</value> </property> <property> <description>To set the value of tmp directory for map and reduce tasks.</description> <name>mapreduce.task.tmp.dir</name> <value>/var/lib/hadoop-mapreduce/cache/\\${user.name}/tasks</value> </property> <property> <name>mapreduce.map.memory.mb</name> <value>512</value> </property> <property> <name>mapreduce.reduce.memory.mb</name> <value>512</value> </property> </configuration> HDPCNF cat << YRNCNF > /etc/hadoop/conf/yarn-site.xml <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle,spark_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.aux-services.spark_shuffle.class</name> <value>org.apache.spark.network.yarn.YarnShuffleService</value> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> </property> <property> <name>yarn.dispatcher.exit-on-error</name> <value>true</value> </property> <property> <description>List of directories to store localized files in.</description> <name>yarn.nodemanager.local-dirs</name> <value>/var/lib/hadoop-yarn/cache/\\${user.name}/nm-local-dir</value> </property> <property> <description>Where to store container logs.</description> <name>yarn.nodemanager.log-dirs</name> <value>/var/log/hadoop-yarn/containers</value> </property> <property> <description>Where to aggregate logs to.</description> <name>yarn.nodemanager.remote-app-log-dir</name> <value>/var/log/hadoop-yarn/apps</value> </property> <property> <description>Classpath for typical applications.</description> <name>yarn.application.classpath</name> <value>\\$HADOOP_CONF_DIR,\\$HADOOP_COMMON_HOME/*,\\$HADOOP_COMMON_HOME/lib/*,\\$HADOOP_HDFS_HOME/*, \\$HADOOP_HDFS_HOME/lib/*,\\$HADOOP_MAPRED_HOME/*,\\$HADOOP_MAPRED_HOME/lib/*,\\$HADOOP_YARN_HOME/*, \\$HADOOP_YARN_HOME/lib/* </value> </property> </configuration> YRNCNF # format namenode if [ ! -e /var/lib/hadoop-hdfs/cache/hdfs ]; then echo "Formatting HDFS..." \ && sudo -u hdfs hdfs namenode -format -force &> /dev/null fi MYSQL_JDBC=mysql-connector-java-5.1.37 MYSQL_JDBC_SOURCE=http://dev.mysql.com/get/Downloads/Connector-J/${MYSQL_JDBC}.tar.gz mkdir -p /usr/local/lib/jdbc/mysql \ && echo "Downloading MySQL JDBC drivers" \ && wget ${MYSQL_JDBC_SOURCE} -q -P /tmp/ \ && echo "Installing MySQL JDBC drivers" \ && tar zxf /tmp/${MYSQL_JDBC}.tar.gz -C /tmp/ \ && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/lib/hive/lib/ \ && cp /tmp/${MYSQL_JDBC}/mysql-connector-java*.jar /usr/local/lib/jdbc/mysql/ cat << HIVECNF > /etc/hive/conf/hive-site.xml <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hive</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>hive</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>hive.metastore.uris</name> <value>thrift://localhost:9083</value> </property> <property> <name>hive.metastore.warehouse.dir</name> <value>hdfs:///user/hive/warehouse</value> </property> </configuration> HIVECNF # auto-start services chkconfig hadoop-hdfs-namenode on \ && chkconfig hadoop-hdfs-datanode on \ @@ -143,7 +300,7 @@ $cloudera_deps = <<SCRIPT && chkconfig oozie on # start hadoop processes if [ ! "$(ps aux | grep hdfs-namenode | wc -l)" == "2" ]; then service hadoop-hdfs-namenode start fi @@ -162,8 +319,21 @@ $cloudera_deps = <<SCRIPT echo "Creating HDFS directory structure" \ &&sudo -u hdfs hdfs dfs -mkdir -p /user \ && sudo -u hdfs hdfs dfs -chmod -R 777 /user \ && sudo -u hdfs hdfs dfs -mkdir -p /user/spark \ && sudo -u hdfs hdfs dfs -chmod -R 755 /user/spark \ && sudo -u hdfs hdfs dfs -mkdir -p /tmp \ && sudo -u hdfs hdfs dfs -chmod -R 777 /tmp \ && sudo -u hdfs hdfs dfs -mkdir -p /user/hive/warehouse \ && sudo -u hdfs hdfs dfs -chown -R hive:hive /user/hive/warehouse \ && sudo -u hdfs hdfs dfs -chmod -R 755 /user/hive/warehouse if [ ! "$(ps aux | grep HiveMetaStore | wc -l)" == "2" ]; then service hive-metastore start fi if [ ! "$(ps aux | grep HiveServer2 | wc -l)" == "2" ]; then service hive-server2 start fi SCRIPT @@ -211,7 +381,7 @@ Vagrant.configure(2) do |config| config.vm.box = "boxcutter/centos66" config.vm.hostname = "cdh.instance.com" config.vm.network :public_network, :mac => "0800DEADBEEF" config.vm.provider "virtualbox" do |vb| vb.name = "cloudera-hadoop" -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -22,7 +22,7 @@ $mysql_deps = <<SCRIPT [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO} yum install -y mysql-community-server mysql-connector-java if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -27,6 +27,7 @@ $mysql_deps = <<SCRIPT if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then echo "# InnoDB settings" >> ${MY_CNF} echo "default_storage_engine = innodb" >> ${MY_CNF} echo "innodb_file_per_table = 1" >> ${MY_CNF} echo "innodb_flush_log_at_trx_commit = 2" >> ${MY_CNF} echo "innodb_log_buffer_size = 64M" >> ${MY_CNF} -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -108,7 +108,7 @@ SPCNF SCRIPT # Cloudera CDH dependencies $cloudera_deps = <<SCRIPT CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo @@ -201,6 +201,7 @@ $information = <<SCRIPT echo "Guest IP address: $ip" echo "Namenode's UI available at: http://$ip:50070" echo "Resource Manager's UI available at: http://$ip:8088" echo "MySQL root password: hadoop" echo "You may want to add the below line to /etc/hosts:" echo "$ip cdh.instance.com" SCRIPT -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 22 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -38,7 +38,7 @@ $mysql_deps = <<SCRIPT chkconfig mysqld on \ && service mysqld start \ && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" &> /dev/null \ && echo "# vagrant provisioned" >> ${MY_CNF} fi @@ -118,11 +118,18 @@ $cloudera_deps = <<SCRIPT && wget ${CLOUDERA_REPO} -q -P /etc/yum.repos.d/ # Cloudera Hadoop installation yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop \ hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode \ hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc \ hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager \ hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server \ oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog \ hive-jdbc avro-libs pig kite impala* # format namenode if [ ! -e /var/lib/hadoop-hdfs/cache/hdfs ]; then echo "Formatting HDFS..." \ && sudo -u hdfs hdfs namenode -format -force &> /dev/null fi # auto-start services @@ -151,7 +158,8 @@ $cloudera_deps = <<SCRIPT service hadoop-yarn-nodemanager start fi echo "Creating HDFS directory structure" \ &&sudo -u hdfs hdfs dfs -mkdir -p /user \ && sudo -u hdfs hdfs dfs -chmod -R 777 /user \ && sudo -u hdfs hdfs dfs -mkdir -p /tmp \ && sudo -u hdfs hdfs dfs -chmod -R 777 /tmp @@ -188,6 +196,15 @@ $system_config = <<SCRIPT SCRIPT $information = <<SCRIPT ip=$(ifconfig eth1 | awk -v host=$(hostname) '/inet addr/ {print substr($2,6)}') echo "Guest IP address: $ip" echo "Namenode's UI available at: http://$ip:50070" echo "Resource Manager's UI available at: http://$ip:8088" echo "You may want to add the below line to /etc/hosts:" echo "$ip cdh.instance.com" SCRIPT Vagrant.configure(2) do |config| config.vm.box = "boxcutter/centos66" @@ -207,5 +224,6 @@ Vagrant.configure(2) do |config| config.vm.provision :shell, :name => "mysql_deps", :inline => $mysql_deps config.vm.provision :shell, :name => "spark_deps", :inline => $spark_deps config.vm.provision :shell, :name => "cloudera_deps", :inline => $cloudera_deps config.vm.provision :shell, :name => "information", :inline => $information end -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 5 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -75,7 +75,6 @@ SPCNF echo "configuring ${SPARK_LINK}/conf/spark-defaults.conf" cat << SPCNF > ${SPARK_LINK}/conf/spark-defaults.conf spark.shuffle.service.enabled true # Execution Behavior spark.broadcast.blockSize 4096 @@ -152,6 +151,11 @@ $cloudera_deps = <<SCRIPT service hadoop-yarn-nodemanager start fi sudo -u hdfs hdfs dfs -mkdir -p /user \ && sudo -u hdfs hdfs dfs -chmod -R 777 /user \ && sudo -u hdfs hdfs dfs -mkdir -p /tmp \ && sudo -u hdfs hdfs dfs -chmod -R 777 /tmp SCRIPT # OS configuration -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 36 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,4 @@ # Anaconda dependencies $anaconda_deps = <<SCRIPT ANACONDA_INSTALLER=https://3230d63b5fc54e62148e-c95ac804525aac4b6dba79b00b39d1d3.ssl.cf1.rackcdn.com/Anaconda-2.3.0-Linux-x86_64.sh @@ -12,6 +13,7 @@ $anaconda_deps = <<SCRIPT SCRIPT # MySQL dependencies $mysql_deps = <<SCRIPT MYSQL_REPO=https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm @@ -43,6 +45,7 @@ $mysql_deps = <<SCRIPT SCRIPT # Spark dependencies $spark_deps = <<SCRIPT SPARK_TGZ=spark-1.5.1-bin-without-hadoop.tgz @@ -106,6 +109,7 @@ SPCNF SCRIPT # Cloduera CDH dependencies $cloudera_deps = <<SCRIPT CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo @@ -117,8 +121,40 @@ $cloudera_deps = <<SCRIPT # Cloudera Hadoop installation yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala* # format namenode if [ ! -e /var/lib/hadoop-hdfs/cache/hdfs ]; then echo "Formatting HDFS..." && sudo -u hdfs hdfs namenode -format -force &> /dev/null fi # auto-start services chkconfig hadoop-hdfs-namenode on \ && chkconfig hadoop-hdfs-datanode on \ && chkconfig hadoop-yarn-resourcemanager on \ && chkconfig hadoop-yarn-nodemanager on \ && chkconfig hive-metastore on \ && chkconfig hive-server2 on \ && chkconfig oozie on # start hadoop processes if [ ! "$(ps aux | grep namenode | wc -l)" == "2" ]; then service hadoop-hdfs-namenode start fi if [ ! "$(ps aux | grep datanode | wc -l)" == "2" ]; then service hadoop-hdfs-datanode start fi if [ ! "$(ps aux | grep resourcemanager | wc -l)" == "2" ]; then service hadoop-yarn-resourcemanager start fi if [ ! "$(ps aux | grep nodemanager | wc -l)" == "2" ]; then service hadoop-yarn-nodemanager start fi SCRIPT # OS configuration $system_config = <<SCRIPT DEV_USER=hadoop_oozie -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 13 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,7 +4,9 @@ $anaconda_deps = <<SCRIPT if [ ! -d "/usr/local/anaconda" ]; then echo "Anaconda installation..." \ && echo "getting binaries" \ && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \ && echo "running installer" \ && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda fi @@ -54,12 +56,20 @@ $spark_deps = <<SCRIPT [ ! -e /opt/${SPARK_TGZ} ] && exit 1 echo "Spark configuration..." echo "configuring /etc/profile.d/spark.sh" echo 'export PATH=$PATH'":${SPARK_LINK}/bin" > /etc/profile.d/spark.sh echo "configuring /opt/spark/conf/spark-env.sh" cat << SPCNF > /opt/spark/conf/spark-env.sh HADOOP_CONF_DIR=/etc/hadoop/conf/ SPARK_DIST_CLASSPATH=\\$(hadoop classpath) LD_LIBRARY_PATH=\\${LD_LIBRARY_PATH}:/opt/cloudera/parcels/CDH/lib/hadoop/lib/native/ SPCNF echo "configuring ${SPARK_LINK}/conf/spark-defaults.conf" cat << SPCNF > ${SPARK_LINK}/conf/spark-defaults.conf spark.yarn.jar hdfs:///user/spark/share/lib/spark-assembly-1.5.0-hadoop2.6.0.jar -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 7 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -54,7 +54,7 @@ $spark_deps = <<SCRIPT [ ! -e /opt/${SPARK_TGZ} ] && exit 1 echo 'export PATH=$PATH'":${SPARK_LINK}/bin" > /etc/profile.d/spark.sh echo "HADOOP_CONF_DIR=/etc/hadoop/conf/" > /opt/spark/conf/spark-env.sh echo "SPARK_DIST_CLASSPATH=$(hadoop classpath)" >> /opt/spark/conf/spark-env.sh @@ -124,6 +124,12 @@ $system_config = <<SCRIPT rm -fR ${PROXY_CONFIG} fi # Add entries to /etc/hosts ip=$(ifconfig eth1 | awk -v host=$(hostname) '/inet addr/ {print substr($2,6)}') host=$(hostname) echo "127.0.0.1 localhost" > /etc/hosts echo "$ip $host" >> /etc/hosts # Add a dev user - don't worry about the password if ! grep ${DEV_USER} /etc/passwd; then echo "Creating user ${DEV_USER}" && useradd -p $(openssl passwd -1 ${DEV_PASSWORD}) ${DEV_USER} \ -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 9 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -14,6 +14,7 @@ $mysql_deps = <<SCRIPT MYSQL_REPO=https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm MY_CNF=/etc/my.cnf DEV_PASSWORD=hadoop [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO} @@ -33,7 +34,7 @@ $mysql_deps = <<SCRIPT chkconfig mysqld on \ && service mysqld start \ && /usr/bin/mysqladmin -u root password "${DEV_PASSWORD}" \ && echo "# vagrant provisioned" >> ${MY_CNF} fi @@ -112,12 +113,16 @@ $system_config = <<SCRIPT DEV_USER=hadoop_oozie DEV_PASSWORD=hadoop PROXY_CONFIG=/etc/profile.d/proxy.sh service iptables stop && chkconfig iptables off if grep ryanair /etc/resolv.conf; then echo "export http_proxy=http://internalproxy.corp.ryanair.com:3128" > ${PROXY_CONFIG} \ && echo "export https_proxy=http://internalproxy.corp.ryanair.com:3128" >> ${PROXY_CONFIG} else rm -fR ${PROXY_CONFIG} fi # Add a dev user - don't worry about the password if ! grep ${DEV_USER} /etc/passwd; then -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 93 additions and 8 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,19 +3,96 @@ $anaconda_deps = <<SCRIPT ANACONDA_INSTALLER=https://3230d63b5fc54e62148e-c95ac804525aac4b6dba79b00b39d1d3.ssl.cf1.rackcdn.com/Anaconda-2.3.0-Linux-x86_64.sh if [ ! -d "/usr/local/anaconda" ]; then echo "Anaconda installation..." \ && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \ && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda fi SCRIPT $mysql_deps = <<SCRIPT MYSQL_REPO=https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm MY_CNF=/etc/my.cnf [ ! -e /etc/yum.repos.d/mysql-community.repo ] && rpm -ivh ${MYSQL_REPO} yum install -y mysql-community-server if [ -e /etc/init.d/mysqld ] && [ -z "$(grep -R vagrant ${MY_CNF})" ]; then echo "# InnoDB settings" >> ${MY_CNF} echo "innodb_file_per_table = 1" >> ${MY_CNF} echo "innodb_flush_log_at_trx_commit = 2" >> ${MY_CNF} echo "innodb_log_buffer_size = 64M" >> ${MY_CNF} echo "innodb_buffer_pool_size = 1G" >> ${MY_CNF} echo "innodb_thread_concurrency = 8" >> ${MY_CNF} echo "innodb_flush_method = O_DIRECT" >> ${MY_CNF} echo "innodb_log_file_size = 512M" >> ${MY_CNF} echo "explicit_defaults_for_timestamp = 1" >> ${MY_CNF} chkconfig mysqld on \ && service mysqld start \ && /usr/bin/mysqladmin -u root password 'hadoop' \ && echo "# vagrant provisioned" >> ${MY_CNF} fi SCRIPT $spark_deps = <<SCRIPT SPARK_TGZ=spark-1.5.1-bin-without-hadoop.tgz SPARK_LINK=/opt/spark [ ! -e ${SPARK_LINK} ] \ && echo "Spark installation..." \ && wget http://ftp.heanet.ie/mirrors/www.apache.org/dist/spark/spark-1.5.1/${SPARK_TGZ} -q -P /opt/ \ && tar zxf /opt/${SPARK_TGZ} -C /opt/ \ && ln -s /opt/spark-1.5.1-bin-without-hadoop ${SPARK_LINK} [ ! -e /opt/${SPARK_TGZ} ] && exit 1 echo "export PATH=\$PATH:${SPARK_LINK}/bin" > /etc/init.d/spark.sh echo "HADOOP_CONF_DIR=/etc/hadoop/conf/" > /opt/spark/conf/spark-env.sh echo "SPARK_DIST_CLASSPATH=$(hadoop classpath)" >> /opt/spark/conf/spark-env.sh echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/cloudera/parcels/CDH/lib/hadoop/lib/native/" >> ${SPARK_LINK}/conf/spark-env.sh cat << SPCNF > ${SPARK_LINK}/conf/spark-defaults.conf spark.yarn.jar hdfs:///user/spark/share/lib/spark-assembly-1.5.0-hadoop2.6.0.jar spark.shuffle.service.enabled true # Execution Behavior spark.broadcast.blockSize 4096 # Dynamic Resource Allocation (YARN) spark.dynamicAllocation.enabled true spark.speculation true spark.scheduler.mode FAIR spark.kryoserializer.buffer.max 1000m spark.driver.maxResultSize 0 spark.serializer org.apache.spark.serializer.KryoSerializer spark.yarn.preserve.staging.files false spark.master yarn spark.rdd.compress true # Local execution of selected Spark functions spark.localExecution.enabled true spark.sql.parquet.binaryAsString true spark.sql.parquet.compression.codec snappy # use lz4 compression for broadcast variables as Snappy is not supported on MacOSX spark.broadcast.compress true spark.io.compression.codec lz4 spark.driver.extraLibraryPath /opt/cloudera/parcels/CDH/lib/hadoop/lib/native spark.executor.extraLibraryPath /opt/cloudera/parcels/CDH/lib/hadoop/lib/native spark.executor.extraJavaOptions -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+UseCompressedOops spark.driver.extraJavaOptions -XX:+UseCompressedOops -XX:MaxPermSize=1g spark.executor.extraClassPath /usr/local/lib/jdbc/sqlserver/*.jar:/usr/local/lib/jdbc/mysql/*.jar:/usr/local/anaconda/bin:/opt/udfs/hive/*.jar spark.driver.extraClassPath /usr/local/lib/jdbc/sqlserver/*.jar:/usr/local/lib/jdbc/mysql/*.jar:/usr/local/anaconda/bin:/opt/udfs/hive/*.jar SPCNF echo "Add hive-site.xml configuration here !!!" SCRIPT $cloudera_deps = <<SCRIPT @@ -32,10 +109,16 @@ $cloudera_deps = <<SCRIPT SCRIPT $system_config = <<SCRIPT DEV_USER=hadoop_oozie DEV_PASSWORD=hadoop service iptables stop && chkconfig iptables off [ ! -e /etc/profile.d/proxy.sh ] \ && echo "export http_proxy=http://internalproxy.corp.ryanair.com:3128" >> /etc/profile.d/proxy.sh \ && echo "export https_proxy=http://internalproxy.corp.ryanair.com:3128" >> /etc/profile.d/proxy.sh # Add a dev user - don't worry about the password if ! grep ${DEV_USER} /etc/passwd; then echo "Creating user ${DEV_USER}" && useradd -p $(openssl passwd -1 ${DEV_PASSWORD}) ${DEV_USER} \ @@ -47,19 +130,21 @@ SCRIPT Vagrant.configure(2) do |config| config.vm.box = "boxcutter/centos66" config.vm.hostname = "cdh.instance.com" config.vm.network :public_network, :bridge => "en3: Thunderbolt Ethernet", :mac => "0800DEADBEEF" config.vm.provider "virtualbox" do |vb| vb.name = "cloudera-hadoop" vb.cpus = 4 vb.memory = 8192 vb.customize ["modifyvm", :id, "--nicpromisc2", "allow-all"] vb.customize ["modifyvm", :id, "--cpuexecutioncap", "100"] end config.vm.provision :shell, :name => "system_config", :inline => $system_config config.vm.provision :shell, :name => "anaconda_deps", :inline => $anaconda_deps config.vm.provision :shell, :name => "mysql_deps", :inline => $mysql_deps config.vm.provision :shell, :name => "spark_deps", :inline => $spark_deps config.vm.provision :shell, :name => "cloudera_deps", :inline => $cloudera_deps end -
bartekdobija revised this gist
Oct 19, 2015 . 1 changed file with 11 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -10,14 +10,21 @@ $anaconda_deps = <<SCRIPT SCRIPT $mysql_deps = <<SCRIPT [ ! -e /etc/yum.repos.d/mysql-community.repo ] \ && rpm -ivh https://dev.mysql.com/get/mysql-community-release-el6-5.noarch.rpm yum install -y mysql-community-server SCRIPT $cloudera_deps = <<SCRIPT CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo # Add Cloudera repository [ ! -e /etc/yum.repos.d/cloudera-cdh5.repo ] \ && wget ${CLOUDERA_REPO} -q -P /etc/yum.repos.d/ # Cloudera Hadoop installation yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala* @@ -44,13 +51,14 @@ Vagrant.configure(2) do |config| config.vm.network :public_network, bridge: "en0: Wi-Fi (AirPort)" config.vm.provider "virtualbox" do |vb| vb.name = "cloudera-hadoop" vb.cpus = 4 vb.memory = 8192 vb.customize ["modifyvm", :id, "--cpuexecutioncap", "100"] end config.vm.provision :shell, :name => "anaconda_deps", :inline => $anaconda_deps config.vm.provision :shell, :name => "mysql_deps", :inline => $mysql_deps config.vm.provision :shell, :name => "cloudera_deps", :inline => $cloudera_deps config.vm.provision :shell, :name => "system_config", :inline => $system_config -
bartekdobija revised this gist
Oct 18, 2015 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,7 +20,7 @@ $cloudera_deps = <<SCRIPT && yum clean all # Cloudera Hadoop installation yum install -y java-1.7.0-openjdk java-1.7.0-openjdk-devel hadoop hadoop-conf-pseudo hadoop-hdfs-datanode hadoop-hdfs-journalnode hadoop-hdfs-namenode hadoop-hdfs-secondarynamenode hadoop-hdfs-zkfc hadoop-libhdfs-devel hadoop-mapreduce-historyserver hadoop-yarn-nodemanager hadoop-yarn-resourcemanager zookeeper zookeeper-native zookeeper-server oozie oozie-client kite sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala* SCRIPT -
bartekdobija revised this gist
Oct 18, 2015 . 1 changed file with 22 additions and 10 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,15 +1,18 @@ $anaconda_deps = <<SCRIPT ANACONDA_INSTALLER=https://3230d63b5fc54e62148e-c95ac804525aac4b6dba79b00b39d1d3.ssl.cf1.rackcdn.com/Anaconda-2.3.0-Linux-x86_64.sh if [ ! -d "/usr/local/anaconda" ]; then echo "Installing Anaconda..." \ && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \ && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda fi SCRIPT $cloudera_deps = <<SCRIPT CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo # Add Cloudera repository [ ! -e /etc/yum.repos.d/cloudera-cdh5.repo ] \ @@ -19,6 +22,13 @@ $dependency_installation = <<SCRIPT # Cloudera Hadoop installation yum install -y hadoop zookeeper oozie sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala* SCRIPT $system_config = <<SCRIPT DEV_USER=hadoop_oozie DEV_PASSWORD=hadoop # Add a dev user - don't worry about the password if ! grep ${DEV_USER} /etc/passwd; then echo "Creating user ${DEV_USER}" && useradd -p $(openssl passwd -1 ${DEV_PASSWORD}) ${DEV_USER} \ @@ -40,6 +50,8 @@ Vagrant.configure(2) do |config| vb.customize ["modifyvm", :id, "--cpuexecutioncap", "100"] end config.vm.provision :shell, :name => "anaconda_deps", :inline => $anaconda_deps config.vm.provision :shell, :name => "cloudera_deps", :inline => $cloudera_deps config.vm.provision :shell, :name => "system_config", :inline => $system_config end -
bartekdobija created this gist
Oct 18, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,45 @@ $dependency_installation = <<SCRIPT CLOUDERA_REPO=http://archive.cloudera.com/cdh5/redhat/6/x86_64/cdh/cloudera-cdh5.repo ANACONDA_INSTALLER=https://3230d63b5fc54e62148e-c95ac804525aac4b6dba79b00b39d1d3.ssl.cf1.rackcdn.com/Anaconda-2.3.0-Linux-x86_64.sh DEV_USER=hadoop_oozie DEV_PASSWORD=hadoop # Anaconda installation [ ! -d "/usr/local/anaconda" ] \ && echo "Installing Anaconda..." \ && wget ${ANACONDA_INSTALLER} -q -P /tmp/ \ && bash /tmp/Anaconda-2.3.0-Linux-x86_64.sh -b -f -p /usr/local/anaconda # Add Cloudera repository [ ! -e /etc/yum.repos.d/cloudera-cdh5.repo ] \ && wget ${CLOUDERA_REPO} -q -P /etc/yum.repos.d/ \ && yum clean all # Cloudera Hadoop installation yum install -y hadoop zookeeper oozie sqoop hive hive-metastore hive-server2 hive-hcatalog hive-jdbc avro-libs pig kite impala* # Add a dev user - don't worry about the password if ! grep ${DEV_USER} /etc/passwd; then echo "Creating user ${DEV_USER}" && useradd -p $(openssl passwd -1 ${DEV_PASSWORD}) ${DEV_USER} \ && echo "${DEV_USER} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/hadoop_oozie fi SCRIPT Vagrant.configure(2) do |config| config.vm.box = "boxcutter/centos66" config.vm.hostname = "cdh.home.com" config.vm.network :public_network, bridge: "en0: Wi-Fi (AirPort)" config.vm.provider "virtualbox" do |vb| vb.name = "vagrant-cdh" vb.cpus = 4 vb.memory = 8192 vb.customize ["modifyvm", :id, "--cpuexecutioncap", "100"] end config.vm.provision :shell, :name => "dep_installer", :inline => $dependency_installation end