Last active
January 31, 2017 07:56
-
-
Save ngi644/395c5453fb90c1a46bf5cf9fb253cdbf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # step 0 | |
| echo "Install needed packages" | |
| sudo apt-get update -y | |
| sudo apt-get upgrade -y | |
| sudo apt-get install -y build-essential software-properties-common python-software-properties curl git libxml2-dev libxslt1-dev libfreetype6-dev python-pip python-apt python-dev libxmlsec1-dev swig libmysqlclient-dev | |
| sudo apt-get install -y python-dev --upgrade | |
| sudo pip install --upgrade pip | |
| sudo pip install setuptools --upgrade | |
| sudo -H pip install --upgrade virtualenv | |
| # LMS_HOSTNAME="https://mulby.sandbox.edx.org" | |
| LMS_HOSTNAME="http://192.168.10.15" | |
| INSIGHTS_HOSTNAME="http://0.0.0.0:8110" # Change this to the externally visible domain and scheme for your Insights install, ideally HTTPS | |
| DB_USERNAME="read_only" | |
| DB_HOST="localhost" | |
| DB_PASSWORD="password" | |
| DB_PORT="3306" | |
| # Run this script to set up the analytics pipeline | |
| echo "Assumes that there's a tracking.log file in \$HOME" | |
| sleep 2 | |
| echo "Create ssh key" | |
| ssh-keygen -t rsa -f ~/.ssh/id_rsa -P '' | |
| echo >> ~/.ssh/authorized_keys # Make sure there's a newline at the end | |
| cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys | |
| # check: ssh localhost "echo It worked!" -- make sure it works. | |
| echo 'create an "ansible" virtualenv and activate it' | |
| virtualenv ansible | |
| . ansible/bin/activate | |
| git clone https://github.com/ngi644/configuration.git | |
| cd configuration/ | |
| pip install -r requirements.txt | |
| cd playbooks/edx-east/ | |
| echo "running ansible -- it's going to take a while" | |
| ansible-playbook -i localhost, -c local analytics_single.yml --extra-vars "INSIGHTS_LMS_BASE=$LMS_HOSTNAME INSIGHTS_BASE_URL=$INSIGHTS_HOSTNAME" --ask-pass --ask-sudo-pass | |
| echo "-- Set up pipeline" | |
| cd $HOME | |
| sudo mkdir -p /edx/var/log/tracking | |
| sudo cp ~/tracking.log /edx/var/log/tracking | |
| sudo chown hadoop /edx/var/log/tracking/tracking.log | |
| echo "Waiting 70 seconds to make sure the logs get loaded into HDFS" | |
| # Hack hackity hack hack -- cron runs every minute and loads data from /edx/var/log/tracking | |
| sleep 70 | |
| # Make a new virtualenv -- otherwise will have conflicts | |
| echo "Make pipeline virtualenv" | |
| virtualenv pipeline | |
| . pipeline/bin/activate | |
| echo "Check out pipeline" | |
| git clone https://github.com/edx/edx-analytics-pipeline | |
| cd edx-analytics-pipeline | |
| make bootstrap | |
| # HACK: make ansible do this | |
| sudo mkdir -p /edx/etc/edx-analytics-pipeline | |
| cat <<EOF > /edx/etc/edx-analytics-pipeline/input.json | |
| {"username": $DB_USERNAME, "host": $DB_HOST, "password": $DB_PASSWORD, "port": $DB_PORT} | |
| EOF | |
| echo "Run the pipeline" | |
| # Ensure you're in the pipeline virtualenv | |
| remote-task --host localhost --repo https://github.com/edx/edx-analytics-pipeline --user ubuntu --override-config $HOME/edx-analytics-pipeline/config/devstack.cfg --wheel-url http://edx-wheelhouse.s3-website-us-east-1.amazonaws.com/Ubuntu/precise --remote-name analyticstack --wait TotalEventsDailyTask --interval 2016 --output-root hdfs://localhost:9000/output/ --local-scheduler | |
| echo "If you got this far without error, you should try running the real pipeline tasks listed/linked below" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment