Last active
August 31, 2015 13:33
-
-
Save bartekdobija/4ca798a5407f57ab26a3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # In this case I have a Hadoop distro compiled from source: | |
| # MAVEN_OPTS="-Xms512m -Xmx1024m" mvn package -Pdist,native -DskipTests -Dtar | |
| # verified with: | |
| # hadoop checknative -a | |
| # with output: | |
| # Native library checking: | |
| # hadoop: true /usr/local/hadoop-2.6.0/lib/native/libhadoop.so.1.0.0 | |
| # zlib: true /lib64/libz.so.1 | |
| # snappy: true /usr/lib64/libsnappy.so.1 | |
| # lz4: true revision:99 | |
| # bzip2: true /lib64/libbz2.so.1 | |
| # openssl: true /usr/lib64/libcrypto.so | |
| # Spark without hadoop dependencies. | |
| # Don't forget to install snappy & snappy-devel on RHEL/CentOS etc. | |
| # Spark dependencies should be configured as per this document https://spark.apache.org/docs/latest/hadoop-provided.html | |
| ####### spark-defaults.conf: ####### | |
| #spark.yarn.jar hdfs:///user/spark/share/lib/spark-assembly-1.5.0-SNAPSHOT-hadoop2.6.0.jar | |
| #spark.ui.enabled false | |
| ##spark.shuffle.spill false | |
| ##spark.shuffle.spill.compress true | |
| ##spark.shuffle.consolidateFiles true | |
| ##spark.shuffle.service.enabled true | |
| ## Execution Behavior | |
| #spark.broadcast.blockSize 4096 | |
| ## Dynamic Resource Allocation (YARN) | |
| ##spark.dynamicAllocation.enabled true | |
| ##spark.dynamicAllocation.executorIdleTimeout 10800 | |
| ##spark.dynamicAllocation.initialExecutors 3 | |
| ##spark.speculation true | |
| #spark.scheduler.mode FAIR | |
| #spark.executor.memory 5G | |
| #spark.kryoserializer.buffer.max 1000m | |
| #spark.driver.maxResultSize 0 | |
| #spark.serializer org.apache.spark.serializer.KryoSerializer | |
| #spark.yarn.preserve.staging.files false | |
| #spark.master yarn | |
| #spark.rdd.compress true | |
| ## Local execution of selected Spark functions | |
| #spark.localExecution.enabled true | |
| #spark.sql.parquet.binaryAsString true | |
| #spark.sql.parquet.compression.codec snappy | |
| ## use lz4 compression for broadcast variables as Snappy is not supported on MacOSX | |
| #spark.broadcast.compress true | |
| #spark.io.compression.codec lz4 | |
| #spark.driver.extraLibraryPath /usr/local/hadoop/lib/native/ | |
| #spark.executor.extraLibraryPath /opt/cloudera/parcels/CDH/lib/hadoop/lib/native | |
| #spark.executor.extraClassPath /usr/local/lib/jdbc/sqlserver/*.jar:/usr/local/lib/jdbc/mysql/*.jar:/usr/local/anaconda/bin | |
| ####### spark-env.sh ####### | |
| # HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop/ | |
| # SPARK_DIST_CLASSPATH=$(hadoop classpath) | |
| # LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/hadoop/lib/native/ | |
| ./make-distribution.sh --name without-hadoop --tgz -Phadoop-2.6 -Psparkr -Phadoop-provided -Phive -Phive-thriftserver -Pyarn -DzincPort=3038 -DskipTests -Dmaven.javadoc.skip=true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment