Skip to content

Instantly share code, notes, and snippets.

@prateek
Created August 14, 2014 19:55
Show Gist options
  • Save prateek/a5afdc33d8892e6ca42f to your computer and use it in GitHub Desktop.
Save prateek/a5afdc33d8892e6ca42f to your computer and use it in GitHub Desktop.

Revisions

  1. prateek revised this gist Aug 14, 2014. No changes.
  2. prateek revised this gist Aug 14, 2014. 1 changed file with 20 additions and 1 deletion.
    21 changes: 20 additions & 1 deletion rmr-install.md
    Original file line number Diff line number Diff line change
    @@ -133,4 +133,23 @@ sudo /usr/sbin/rstudio-server start
    ```

    # Browser
    go to "edge-host:8787"
    go to "edge-host:8787"

    # Check if RMR is working - example job
    ```r
    # in rstudio
    Sys.setenv(HADOOP_CMD = "/usr/bin/hadoop")
    Sys.setenv(HADOOP_STREAMING = "/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-streaming.jar")
    library(rmr2)
    small.ints = to.dfs(1:1000)
    result = mapreduce(
    input = small.ints,
    map = function(k, v) cbind(v, v^2),
    backend.parameters =
    list(
    hadoop =
    list(
    D = "mapreduce.map.memory.mb=8192",
    D = "mapreduce.reduce.memory.mb=8192")))
    from.dfs(result)
    ```
  3. prateek created this gist Aug 14, 2014.
    136 changes: 136 additions & 0 deletions rmr-install.md
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,136 @@
    # Install Steps for RMR
    These steps have been tested with
    - Oracle Linux 6.4
    - RHEL 6.5
    - CDH 5.1

    *Note* I wish this was parceled up.

    # Step 0 (all nodes)
    ```sh
    sudo bash
    cd /tmp
    ```

    # enable epel repo (all nodes)
    ```sh
    wget http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
    rpm -ivh epel-release-6-8.noarch.rpm
    ```

    # prereqs (all nodes)
    ```sh
    yum install -y xdg-utils bzip2-devel gcc-c++ gcc-gfortran libX11-devel pcre-devel \
    tcl-devel tk-devel zlib-devel readline-devel libXt-devel libpng-devel cairo-devel \
    pango-devel 'libXmu.so.6()(64bit)' 'libgfortran.so.1()(64bit)' 'perl(File::Copy::Recursive)'
    ```

    # r-install (all-nodes)
    ```sh
    cd /tmp/
    wget http://cran.rstudio.com/src/base/R-2/R-2.15.3.tar.gz
    tar xvfz R-2.15.3.tar.gz
    cd R-2.15.3
    ./configure --enable-R-shlib --without-x --with-cairo && make && sudo make install
    sudo ln -s /usr/local/bin/Rscript /usr/bin/
    sudo ln -s /usr/local/bin/R /usr/bin/
    cd ..
    ```

    # java install (all-nodes)
    ```sh
    alternatives --install /usr/bin/java java /usr/java/jdk1.7.0_45-cloudera/bin/java 2000
    alternatives --install /usr/bin/javac javac /usr/java/jdk1.7.0_45-cloudera/bin/javac 2000
    alternatives --install /usr/bin/jar jar /usr/java/jdk1.7.0_45-cloudera/bin/jar 2000
    alternatives --install /usr/bin/javah javah /usr/java/jdk1.7.0_45-cloudera/bin/javah 2000
    # verify the java version
    # java -version
    # javac -version
    ```


    # rimpala rhadoop (all nodes)
    ```sh
    export JAVA_HOME=/usr/java/jdk1.7.0_45-cloudera/
    export JAVA_CPPFLAGS="-I$JAVA_HOME/include -I$JAVA_HOME/include/linux"
    export JAVA_LD_LIBRARY_PATH="$JAVA_HOME/jre/lib/amd64/server:$JAVA_HOME/jre/lib/amd64:$JAVA_HOME/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib"
    export LD_LIBRARY_PATH="$JAVA_HOME/jre/lib/amd64/server:$JAVA_HOME/jre/lib/amd64:$JAVA_HOME/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib"
    export JAVA_LIBS="-L$JAVA_HOME/jre/lib/amd64/server -L$JAVA_HOME/jre/lib/amd64 -L$JAVA_HOME/../lib/amd64 -L/usr/java/packages/lib/amd64 -L/usr/lib64 -L/lib64 -L/lib -L/usr/lib -ljvm"
    ```

    # configure java for R (all nodes)
    ```sh
    R CMD javareconf
    ```

    # rJava install & verify (all nodes)
    ```r
    # Inside R
    install.packages(c("rJava"), repos="http://cran.us.r-project.org/")
    library(rJava)
    .jinit()
    .jcall("java/lang/System","S","getProperty","java.version")
    ```

    # Rcpp (all nodes)
    ```sh
    wget http://cran.us.r-project.org/src/contrib/Archive/Rcpp/Rcpp_0.9.15.tar.gz
    R CMD INSTALL Rcpp_0.9.15.tar.gz
    ```

    # plyr v1.8 (all nodes)
    ```sh
    wget http://cran.revolutionanalytics.com/src/contrib/Archive/plyr/plyr_1.8.tar.gz
    R CMD INSTALL plyr_1.8.tar.gz
    ```

    # reshape2 v1.2.2 (all nodes)
    ```sh
    wget http://cran.revolutionanalytics.com/src/contrib/Archive/reshape2/reshape2_1.2.2.tar.gz
    R CMD INSTALL reshape2_1.2.2.tar.gz
    ```

    # Install other R packages (all nodes)
    ```r
    # Inside R
    install.packages(c("RJSONIO", "bitops", "digest", "functional", "RImpala"), repos="http://cran.us.r-project.org/")
    install.packages(c('itertools'), repos="http://cran.revolutionanalytics.com", INSTALL_opts=c('--byte-compile') )
    install.packages(c('functional', 'stringr'), repos="http://cran.revolutionanalytics.com", INSTALL_opts=c('--byte-compile') )
    install.packages(c('randomForest'), repos="http://cran.revolutionanalytics.com" )
    install.packages(c("caTools"), repos="http://cran.us.r-project.org/")
    ```

    # install git
    ```sh
    yum install -y git
    ```

    # install rmr2 and rhdfs (all nodes)
    ```sh
    export HADOOP_CMD=/usr/bin/hadoop
    export HADOOP_STREAMING=/opt/cloudera/parcels/CDH/lib/hadoop-mapreduce/hadoop-streaming.jar

    git clone git://github.com/RevolutionAnalytics/rmr2.git
    sudo R CMD INSTALL --byte-compile rmr2/pkg/

    git clone git://github.com/RevolutionAnalytics/rhdfs.git
    sudo HADOOP_CMD=/usr/bin/hadoop R CMD INSTALL --byte-compile rhdfs/pkg/
    ```

    # RSTUDIO! (only edge)
    ```sh
    wget http://download2.rstudio.org/rstudio-server-0.98.490-x86_64.rpm
    yum install -y 'libssl.so.6()(64bit)' shared-mime-info
    rpm -i --nodeps rstudio-server-0.98.490-x86_64.rpm
    ```

    ## use-unix login for rstudio
    ```sh
    sudo /usr/sbin/rstudio-server stop
    cp /etc/pam.d/login /etc/pam.d/rstudio
    echo rsession-which-r=/usr/local/bin/R | sudo tee /etc/rstudio/rstudio.conf
    sudo /usr/sbin/rstudio-server start
    ```

    # Browser
    go to "edge-host:8787"