Skip to content

Instantly share code, notes, and snippets.

@andershammar
Last active October 9, 2018 03:31
Show Gist options
  • Save andershammar/224e1077021d0ea376dd to your computer and use it in GitHub Desktop.
Save andershammar/224e1077021d0ea376dd to your computer and use it in GitHub Desktop.

Revisions

  1. andershammar revised this gist Sep 18, 2015. No changes.
  2. andershammar revised this gist Sep 18, 2015. No changes.
  3. andershammar revised this gist Sep 18, 2015. No changes.
  4. andershammar revised this gist Sep 18, 2015. No changes.
  5. andershammar revised this gist Sep 18, 2015. 1 changed file with 75 additions and 58 deletions.
    133 changes: 75 additions & 58 deletions install-apache-zeppelin-on-amazon-emr.sh
    Original file line number Diff line number Diff line change
    @@ -1,82 +1,99 @@
    #!/bin/bash -ex

    # Install Git
    sudo yum -y install git
    if [ "$(cat /mnt/var/lib/info/instance.json | jq -r .isMaster)" == "true" ]; then
    # Install Git
    sudo yum -y install git

    # Install Maven
    wget -P /tmp http://apache.mirrors.spacedump.net/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz
    sudo mkdir /opt/apache-maven
    sudo tar -xvzf /tmp/apache-maven-3.3.3-bin.tar.gz -C /opt/apache-maven

    cat <<EOF >> /home/hadoop/.bashrc
    # Install Maven
    wget -P /tmp http://apache.mirrors.spacedump.net/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz
    sudo mkdir /opt/apache-maven
    sudo tar -xvzf /tmp/apache-maven-3.3.3-bin.tar.gz -C /opt/apache-maven
    cat <<EOF >> /home/hadoop/.bashrc
    # Maven
    export MAVEN_HOME=/opt/apache-maven/apache-maven-3.3.3
    export PATH=\$MAVEN_HOME/bin:\$PATH
    EOF
    source /home/hadoop/.bashrc

    source /home/hadoop/.bashrc

    # Install Zeppelin
    git clone https://github.com/apache/incubator-zeppelin.git /home/hadoop/zeppelin
    cd /home/hadoop/zeppelin
    mvn clean package -Pspark-1.3 -Dhadoop.version=2.4.0 -Phadoop-2.4 -Pyarn -DskipTests
    # Install Zeppelin
    git clone https://github.com/apache/incubator-zeppelin.git /home/hadoop/zeppelin
    cd /home/hadoop/zeppelin
    mvn clean package -Pspark-1.4 -Dhadoop.version=2.6.0 -Phadoop-2.6 -Pyarn -DskipTests

    # Configure Zeppelin
    SPARK_DEFAULTS=/home/hadoop/spark/conf/spark-defaults.conf

    declare -a ZEPPELIN_JAVA_OPTS
    if [ -f $SPARK_DEFAULTS ]; then
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.executor.instances $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.executor.cores $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.executor.memory $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.default.parallelism $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    fi
    echo "${ZEPPELIN_JAVA_OPTS[@]}"
    # Configure Zeppelin
    SPARK_DEFAULTS=/usr/lib/spark/conf/spark-defaults.conf
    declare -a ZEPPELIN_JAVA_OPTS
    if [ -f $SPARK_DEFAULTS ]; then
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.executor.instances $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.executor.cores $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.executor.memory $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.default.parallelism $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.yarn.executor.memoryOverhead $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    fi
    echo "${ZEPPELIN_JAVA_OPTS[@]}"

    cp conf/zeppelin-env.sh.template conf/zeppelin-env.sh
    cat <<EOF >> conf/zeppelin-env.sh
    cp conf/zeppelin-env.sh.template conf/zeppelin-env.sh
    cat <<EOF >> conf/zeppelin-env.sh
    export MASTER=yarn-client
    export HADOOP_CONF_DIR=$HADOOP_CONF_DIR
    export HADOOP_HOME=/usr/lib/hadoop
    export HADOOP_CONF_DIR=/etc/hadoop/conf
    export ZEPPELIN_SPARK_USEHIVECONTEXT=false
    export ZEPPELIN_JAVA_OPTS="${ZEPPELIN_JAVA_OPTS[@]}"
    export PYTHONPATH=$PYTHONPATH:/usr/lib/spark/python
    EOF

    cat <<'EOF' > 0001-Add-Amazon-EMR-jars-to-Zeppelin-classpath.patch
    From 5bad22dd3681305f081233cbecea5a55bf3dcc7d Mon Sep 17 00:00:00 2001
    cat <<'EOF' > 0001-Add-Hadoop-libraries-and-EMRFS-to-Zeppelin-classpath.patch
    From 2b0226e45207758d526522bd22d497c9def7c008 Mon Sep 17 00:00:00 2001
    From: Anders Hammar <[email protected]>
    Date: Wed, 24 Jun 2015 15:09:02 +0200
    Subject: [PATCH] Add Amazon EMR jars to Zeppelin classpath
    Date: Fri, 18 Sep 2015 10:24:18 +0000
    Subject: [PATCH] Add Hadoop libraries and EMRFS to Zeppelin classpath
    ---
    bin/common.sh | 2 ++
    1 file changed, 2 insertions(+)
    bin/interpreter.sh | 13 +++++++++++++
    1 file changed, 13 insertions(+)
    diff --git a/bin/common.sh b/bin/common.sh
    index 8087e9d..69e09d4 100644
    --- a/bin/common.sh
    +++ b/bin/common.sh
    @@ -86,6 +86,8 @@ function addJarInDir(){
    if [[ ! -z "${SPARK_HOME}" ]] && [[ -d "${SPARK_HOME}" ]]; then
    addJarInDir "${SPARK_HOME}"
    + addJarInDir "${SPARK_HOME}/classpath/emr"
    + addJarInDir "${SPARK_HOME}/classpath/emrfs"
    fi
    if [[ ! -z "${HADOOP_HOME}" ]] && [[ -d "${HADOOP_HOME}" ]]; then
    --
    1.8.2.2
    diff --git a/bin/interpreter.sh b/bin/interpreter.sh
    index e03a13b..de458f2 100755
    --- a/bin/interpreter.sh
    +++ b/bin/interpreter.sh
    @@ -89,8 +89,21 @@ if [[ "${INTERPRETER_ID}" == "spark" ]]; then
    # CDH
    addJarInDir "${HADOOP_HOME}"
    addJarInDir "${HADOOP_HOME}/lib"
    +
    + # Hadoop libraries
    + addJarInDir "${HADOOP_HOME}/../hadoop-hdfs"
    + addJarInDir "${HADOOP_HOME}/../hadoop-mapreduce"
    + addJarInDir "${HADOOP_HOME}/../hadoop-yarn"
    +
    + # Hadoop LZO
    + addJarInDir "${HADOOP_HOME}/../hadoop-lzo/lib"
    fi
    + # Add EMRFS libraries
    + addJarInDir "/usr/share/aws/emr/emrfs/conf"
    + addJarInDir "/usr/share/aws/emr/emrfs/lib"
    + addJarInDir "/usr/share/aws/emr/emrfs/auxlib"
    +
    addJarInDir "${INTERPRETER_DIR}/dep"
    PYSPARKPATH="${ZEPPELIN_HOME}/interpreter/spark/pyspark/pyspark.zip:${ZEPPELIN_HOME}/interpreter/spark/pyspark/py4j-0.8.2.1-src.zip"
    --
    2.1.0
    EOF
    git config user.email "[email protected]"
    git config user.name "Your Name"
    git am 0001-Add-Amazon-EMR-jars-to-Zeppelin-classpath.patch
    git config user.email "[email protected]"
    git config user.name "Your Name"
    git am 0001-Add-Hadoop-libraries-and-EMRFS-to-Zeppelin-classpath.patch

    # Start the Zeppelin daemon
    bin/zeppelin-daemon.sh start
    # Start the Zeppelin daemon
    bin/zeppelin-daemon.sh start
    fi
  6. andershammar created this gist Jun 26, 2015.
    82 changes: 82 additions & 0 deletions install-apache-zeppelin-on-amazon-emr.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,82 @@
    #!/bin/bash -ex

    # Install Git
    sudo yum -y install git

    # Install Maven
    wget -P /tmp http://apache.mirrors.spacedump.net/maven/maven-3/3.3.3/binaries/apache-maven-3.3.3-bin.tar.gz
    sudo mkdir /opt/apache-maven
    sudo tar -xvzf /tmp/apache-maven-3.3.3-bin.tar.gz -C /opt/apache-maven

    cat <<EOF >> /home/hadoop/.bashrc
    # Maven
    export MAVEN_HOME=/opt/apache-maven/apache-maven-3.3.3
    export PATH=\$MAVEN_HOME/bin:\$PATH
    EOF

    source /home/hadoop/.bashrc

    # Install Zeppelin
    git clone https://github.com/apache/incubator-zeppelin.git /home/hadoop/zeppelin
    cd /home/hadoop/zeppelin
    mvn clean package -Pspark-1.3 -Dhadoop.version=2.4.0 -Phadoop-2.4 -Pyarn -DskipTests

    # Configure Zeppelin
    SPARK_DEFAULTS=/home/hadoop/spark/conf/spark-defaults.conf

    declare -a ZEPPELIN_JAVA_OPTS
    if [ -f $SPARK_DEFAULTS ]; then
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.executor.instances $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.executor.cores $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.executor.memory $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    ZEPPELIN_JAVA_OPTS=("${ZEPPELIN_JAVA_OPTS[@]}" \
    $(grep spark.default.parallelism $SPARK_DEFAULTS | awk '{print "-D" $1 "=" $2}'))
    fi
    echo "${ZEPPELIN_JAVA_OPTS[@]}"

    cp conf/zeppelin-env.sh.template conf/zeppelin-env.sh
    cat <<EOF >> conf/zeppelin-env.sh
    export MASTER=yarn-client
    export HADOOP_CONF_DIR=$HADOOP_CONF_DIR
    export ZEPPELIN_SPARK_USEHIVECONTEXT=false
    export ZEPPELIN_JAVA_OPTS="${ZEPPELIN_JAVA_OPTS[@]}"
    EOF

    cat <<'EOF' > 0001-Add-Amazon-EMR-jars-to-Zeppelin-classpath.patch
    From 5bad22dd3681305f081233cbecea5a55bf3dcc7d Mon Sep 17 00:00:00 2001
    From: Anders Hammar <[email protected]>
    Date: Wed, 24 Jun 2015 15:09:02 +0200
    Subject: [PATCH] Add Amazon EMR jars to Zeppelin classpath
    ---
    bin/common.sh | 2 ++
    1 file changed, 2 insertions(+)
    diff --git a/bin/common.sh b/bin/common.sh
    index 8087e9d..69e09d4 100644
    --- a/bin/common.sh
    +++ b/bin/common.sh
    @@ -86,6 +86,8 @@ function addJarInDir(){
    if [[ ! -z "${SPARK_HOME}" ]] && [[ -d "${SPARK_HOME}" ]]; then
    addJarInDir "${SPARK_HOME}"
    + addJarInDir "${SPARK_HOME}/classpath/emr"
    + addJarInDir "${SPARK_HOME}/classpath/emrfs"
    fi
    if [[ ! -z "${HADOOP_HOME}" ]] && [[ -d "${HADOOP_HOME}" ]]; then
    --
    1.8.2.2
    EOF
    git config user.email "[email protected]"
    git config user.name "Your Name"
    git am 0001-Add-Amazon-EMR-jars-to-Zeppelin-classpath.patch

    # Start the Zeppelin daemon
    bin/zeppelin-daemon.sh start