Skip to content

Instantly share code, notes, and snippets.

@ngi644
Last active January 31, 2017 07:56
Show Gist options
  • Select an option

  • Save ngi644/395c5453fb90c1a46bf5cf9fb253cdbf to your computer and use it in GitHub Desktop.

Select an option

Save ngi644/395c5453fb90c1a46bf5cf9fb253cdbf to your computer and use it in GitHub Desktop.

Revisions

  1. ngi644 revised this gist Jan 31, 2017. 1 changed file with 4 additions and 3 deletions.
    7 changes: 4 additions & 3 deletions custom_install_insight.sh
    Original file line number Diff line number Diff line change
    @@ -32,13 +32,13 @@ echo 'create an "ansible" virtualenv and activate it'
    virtualenv ansible
    . ansible/bin/activate
    git clone https://github.com/ngi644/configuration.git

    cd configuration/
    pip install -r requirements.txt
    cd playbooks/edx-east/
    echo "running ansible -- it's going to take a while"
    ansible-playbook -i localhost, -c local analytics_single.yml --extra-vars "INSIGHTS_LMS_BASE=$LMS_HOSTNAME INSIGHTS_BASE_URL=$INSIGHTS_HOSTNAME"
    ansible-playbook -i localhost, -c local analytics_single.yml --extra-vars "INSIGHTS_LMS_BASE=$LMS_HOSTNAME INSIGHTS_BASE_URL=$INSIGHTS_HOSTNAME" --ask-pass --ask-sudo-pass

    echo "-- Set up pipeline"
    cd $HOME
    sudo mkdir -p /edx/var/log/tracking
    @@ -59,6 +59,7 @@ git clone https://github.com/edx/edx-analytics-pipeline
    cd edx-analytics-pipeline
    make bootstrap
    # HACK: make ansible do this
    sudo mkdir -p /edx/etc/edx-analytics-pipeline
    cat <<EOF > /edx/etc/edx-analytics-pipeline/input.json
    {"username": $DB_USERNAME, "host": $DB_HOST, "password": $DB_PASSWORD, "port": $DB_PORT}
    EOF
  2. ngi644 revised this gist Jan 31, 2017. 1 changed file with 14 additions and 5 deletions.
    19 changes: 14 additions & 5 deletions custom_install_insight.sh
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,18 @@
    #!/bin/bash

    LMS_HOSTNAME="https://mulby.sandbox.edx.org"
    # step 0
    echo "Install needed packages"
    sudo apt-get update -y
    sudo apt-get upgrade -y
    sudo apt-get install -y build-essential software-properties-common python-software-properties curl git libxml2-dev libxslt1-dev libfreetype6-dev python-pip python-apt python-dev libxmlsec1-dev swig libmysqlclient-dev
    sudo apt-get install -y python-dev --upgrade
    sudo pip install --upgrade pip
    sudo pip install setuptools --upgrade
    sudo -H pip install --upgrade virtualenv


    # LMS_HOSTNAME="https://mulby.sandbox.edx.org"
    LMS_HOSTNAME="http://192.168.10.15"
    INSIGHTS_HOSTNAME="http://0.0.0.0:8110" # Change this to the externally visible domain and scheme for your Insights install, ideally HTTPS
    DB_USERNAME="read_only"
    DB_HOST="localhost"
    @@ -15,10 +27,7 @@ ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''
    echo >> ~/.ssh/authorized_keys # Make sure there's a newline at the end
    cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
    # check: ssh localhost "echo It worked!" -- make sure it works.
    echo "Install needed packages"
    sudo apt-get update
    sudo apt-get install -y git python-pip python-dev libmysqlclient-dev
    sudo pip install virtualenv

    echo 'create an "ansible" virtualenv and activate it'
    virtualenv ansible
    . ansible/bin/activate
  3. ngi644 created this gist Jan 31, 2017.
    61 changes: 61 additions & 0 deletions custom_install_insight.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,61 @@
    #!/bin/bash

    LMS_HOSTNAME="https://mulby.sandbox.edx.org"
    INSIGHTS_HOSTNAME="http://0.0.0.0:8110" # Change this to the externally visible domain and scheme for your Insights install, ideally HTTPS
    DB_USERNAME="read_only"
    DB_HOST="localhost"
    DB_PASSWORD="password"
    DB_PORT="3306"
    # Run this script to set up the analytics pipeline
    echo "Assumes that there's a tracking.log file in \$HOME"
    sleep 2

    echo "Create ssh key"
    ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''
    echo >> ~/.ssh/authorized_keys # Make sure there's a newline at the end
    cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
    # check: ssh localhost "echo It worked!" -- make sure it works.
    echo "Install needed packages"
    sudo apt-get update
    sudo apt-get install -y git python-pip python-dev libmysqlclient-dev
    sudo pip install virtualenv
    echo 'create an "ansible" virtualenv and activate it'
    virtualenv ansible
    . ansible/bin/activate
    git clone https://github.com/ngi644/configuration.git

    cd configuration/
    pip install -r requirements.txt
    cd playbooks/edx-east/
    echo "running ansible -- it's going to take a while"
    ansible-playbook -i localhost, -c local analytics_single.yml --extra-vars "INSIGHTS_LMS_BASE=$LMS_HOSTNAME INSIGHTS_BASE_URL=$INSIGHTS_HOSTNAME"

    echo "-- Set up pipeline"
    cd $HOME
    sudo mkdir -p /edx/var/log/tracking
    sudo cp ~/tracking.log /edx/var/log/tracking
    sudo chown hadoop /edx/var/log/tracking/tracking.log

    echo "Waiting 70 seconds to make sure the logs get loaded into HDFS"
    # Hack hackity hack hack -- cron runs every minute and loads data from /edx/var/log/tracking
    sleep 70

    # Make a new virtualenv -- otherwise will have conflicts
    echo "Make pipeline virtualenv"
    virtualenv pipeline
    . pipeline/bin/activate

    echo "Check out pipeline"
    git clone https://github.com/edx/edx-analytics-pipeline
    cd edx-analytics-pipeline
    make bootstrap
    # HACK: make ansible do this
    cat <<EOF > /edx/etc/edx-analytics-pipeline/input.json
    {"username": $DB_USERNAME, "host": $DB_HOST, "password": $DB_PASSWORD, "port": $DB_PORT}
    EOF

    echo "Run the pipeline"
    # Ensure you're in the pipeline virtualenv
    remote-task --host localhost --repo https://github.com/edx/edx-analytics-pipeline --user ubuntu --override-config $HOME/edx-analytics-pipeline/config/devstack.cfg --wheel-url http://edx-wheelhouse.s3-website-us-east-1.amazonaws.com/Ubuntu/precise --remote-name analyticstack --wait TotalEventsDailyTask --interval 2016 --output-root hdfs://localhost:9000/output/ --local-scheduler

    echo "If you got this far without error, you should try running the real pipeline tasks listed/linked below"