Last active
October 9, 2018 03:31
-
-
Save andershammar/224e1077021d0ea376dd to your computer and use it in GitHub Desktop.
Revisions
-
andershammar revised this gist
Sep 18, 2015 . No changes.There are no files selected for viewing
-
andershammar revised this gist
Sep 18, 2015 . No changes.There are no files selected for viewing
-
andershammar revised this gist
Sep 18, 2015 . No changes.There are no files selected for viewing
-
andershammar revised this gist
Sep 18, 2015 . No changes.There are no files selected for viewing
-
andershammar revised this gist
Sep 18, 2015 . 1 changed file with 75 additions and 58 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,82 +1,99 @@ #!/bin/bash -ex if [ "$(cat /mnt/var/lib/info/instance.json | jq -r .isMaster)" == "true" ]; then # Install Git sudo yum -y install git # Install Maven wget -P /tmp http://apache.mirrors.spacedump.net/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz sudo mkdir /opt/apache-maven sudo tar -xvzf /tmp/apache-maven-3.3.3-bin.tar.gz -C /opt/apache-maven cat <<EOF >> /home/hadoop/.bashrc # Maven export MAVEN_HOME=/opt/apache-maven/apache-maven-3.3.3 export PATH=\$MAVEN_HOME/bin:\$PATH EOF source /home/hadoop/.bashrc # Install Zeppelin git clone https://github.com/apache/incubator-zeppelin.git /home/hadoop/zeppelin cd /home/hadoop/zeppelin mvn clean package -Pspark-1.4 -Dhadoop.version=2.6.0 -Phadoop-2.6 -Pyarn -DskipTests # Configure Zeppelin SPARK_DEFAULTS=/usr/lib/spark/conf/spark-defaults.conf declare -a ZEPPELIN_JAVA_OPTS if [ -f $SPARK_DEFAULTS ]; then ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ $(grep spark.executor.instances $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ $(grep spark.executor.cores $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ $(grep spark.executor.memory $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ $(grep spark.default.parallelism $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ $(grep spark.yarn.executor.memoryOverhead $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) fi echo "${ZEPPELIN_JAVA_OPTS[@]}" cp conf/zeppelin-env.sh.template conf/zeppelin-env.sh cat <<EOF >> conf/zeppelin-env.sh export MASTER=yarn-client export HADOOP_HOME=/usr/lib/hadoop export HADOOP_CONF_DIR=/etc/hadoop/conf export ZEPPELIN_SPARK_USEHIVECONTEXT=false export ZEPPELIN_JAVA_OPTS="${ZEPPELIN_JAVA_OPTS[@]}" export PYTHONPATH=$PYTHONPATH:/usr/lib/spark/python EOF cat <<'EOF' > 0001-Add-Hadoop-libraries-and-EMRFS-to-Zeppelin-classpath.patch From 2b0226e45207758d526522bd22d497c9def7c008 Mon Sep 17 00:00:00 2001 From: Anders Hammar <[email protected]> Date: Fri, 18 Sep 2015 10:24:18 +0000 Subject: [PATCH] Add Hadoop libraries and EMRFS to Zeppelin classpath --- bin/interpreter.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/bin/interpreter.sh b/bin/interpreter.sh index e03a13b..de458f2 100755 --- a/bin/interpreter.sh +++ b/bin/interpreter.sh @@ -89,8 +89,21 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then # CDH addJarInDir "${HADOOP_HOME}" addJarInDir "${HADOOP_HOME}/lib" + + # Hadoop libraries + addJarInDir "${HADOOP_HOME}/../hadoop-hdfs" + addJarInDir "${HADOOP_HOME}/../hadoop-mapreduce" + addJarInDir "${HADOOP_HOME}/../hadoop-yarn" + + # Hadoop LZO + addJarInDir "${HADOOP_HOME}/../hadoop-lzo/lib" fi + # Add EMRFS libraries + addJarInDir "/usr/share/aws/emr/emrfs/conf" + addJarInDir "/usr/share/aws/emr/emrfs/lib" + addJarInDir "/usr/share/aws/emr/emrfs/auxlib" + addJarInDir "${INTERPRETER_DIR}/dep" PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-0.8.2.1-src.zip" -- 2.1.0 EOF git config user.email "[email protected]" git config user.name "Your Name" git am 0001-Add-Hadoop-libraries-and-EMRFS-to-Zeppelin-classpath.patch # Start the Zeppelin daemon bin/zeppelin-daemon.sh start fi -
andershammar created this gist
Jun 26, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,82 @@ #!/bin/bash -ex # Install Git sudo yum -y install git # Install Maven wget -P /tmp http://apache.mirrors.spacedump.net/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz sudo mkdir /opt/apache-maven sudo tar -xvzf /tmp/apache-maven-3.3.3-bin.tar.gz -C /opt/apache-maven cat <<EOF >> /home/hadoop/.bashrc # Maven export MAVEN_HOME=/opt/apache-maven/apache-maven-3.3.3 export PATH=\$MAVEN_HOME/bin:\$PATH EOF source /home/hadoop/.bashrc # Install Zeppelin git clone https://github.com/apache/incubator-zeppelin.git /home/hadoop/zeppelin cd /home/hadoop/zeppelin mvn clean package -Pspark-1.3 -Dhadoop.version=2.4.0 -Phadoop-2.4 -Pyarn -DskipTests # Configure Zeppelin SPARK_DEFAULTS=/home/hadoop/spark/conf/spark-defaults.conf declare -a ZEPPELIN_JAVA_OPTS if [ -f $SPARK_DEFAULTS ]; then ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ $(grep spark.executor.instances $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ $(grep spark.executor.cores $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ $(grep spark.executor.memory $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \ $(grep spark.default.parallelism $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}')) fi echo "${ZEPPELIN_JAVA_OPTS[@]}" cp conf/zeppelin-env.sh.template conf/zeppelin-env.sh cat <<EOF >> conf/zeppelin-env.sh export MASTER=yarn-client export HADOOP_CONF_DIR=$HADOOP_CONF_DIR export ZEPPELIN_SPARK_USEHIVECONTEXT=false export ZEPPELIN_JAVA_OPTS="${ZEPPELIN_JAVA_OPTS[@]}" EOF cat <<'EOF' > 0001-Add-Amazon-EMR-jars-to-Zeppelin-classpath.patch From 5bad22dd3681305f081233cbecea5a55bf3dcc7d Mon Sep 17 00:00:00 2001 From: Anders Hammar <[email protected]> Date: Wed, 24 Jun 2015 15:09:02 +0200 Subject: [PATCH] Add Amazon EMR jars to Zeppelin classpath --- bin/common.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/common.sh b/bin/common.sh index 8087e9d..69e09d4 100644 --- a/bin/common.sh +++ b/bin/common.sh @@ -86,6 +86,8 @@ function addJarInDir(){ if [[ ! -z "${SPARK_HOME}" ]] && [[ -d "${SPARK_HOME}" ]]; then addJarInDir "${SPARK_HOME}" + addJarInDir "${SPARK_HOME}/classpath/emr" + addJarInDir "${SPARK_HOME}/classpath/emrfs" fi if [[ ! -z "${HADOOP_HOME}" ]] && [[ -d "${HADOOP_HOME}" ]]; then -- 1.8.2.2 EOF git config user.email "[email protected]" git config user.name "Your Name" git am 0001-Add-Amazon-EMR-jars-to-Zeppelin-classpath.patch # Start the Zeppelin daemon bin/zeppelin-daemon.sh start