erikbern · June 26, 2023 00:40 · hammer · Dec 16, 2015 · babakbehzad · Dec 20, 2015
diff --git a/install-tensorflow.sh b/install-tensorflow.sh
 # This is mostly based on this excellent blog post:
 # http://tleyden.github.io/blog/2014/10/25/cuda-6-dot-5-on-aws-gpu-instance-running-ubuntu-14-dot-04/

 # Install various packages
 sudo apt-get update
 sudo apt-get upgrade -y # choose “install package maintainers version”
 sudo apt-get install -y build-essential python-pip python-dev git python-numpy swig python-dev default-java-sdk zip zlib1g-dev

 # Blacklist Noveau which has some kind of conflict with the nvidia driver
 echo -e "blacklist nouveau\nblacklist lbm-nouveau\noptions nouveau modeset=0\nalias nouveau off\nalias lbm-nouveau off\n" | sudo tee /etc/modprobe.d/blacklist-nouveau.conf
 echo options nouveau modeset=0 | sudo tee -a /etc/modprobe.d/nouveau-kms.conf
 sudo update-initramfs -u
 sudo reboot # Reboot (annoying you have to do this in 2015!)

 # Some other annoying thing we have to do
 sudo apt-get install -y linux-image-extra-virtual
 sudo reboot # Not sure why this is needed

 # Install latest Linux headers
 sudo apt-get install -y linux-source linux-headers-`uname -r` 

 # Install CUDA 7.0
 wget http://developer.download.nvidia.com/compute/cuda/7_0/Prod/local_installers/cuda_7.0.28_linux.run
 chmod +x cuda_7.0.28_linux.run
 ./cuda_7.0.28_linux.run -extract=`pwd`/nvidia_installers
 cd nvidia_installers
 sudo ./NVIDIA-Linux-x86_64-346.46.run 
 sudo modprobe nvidia
 sudo ./cuda-linux64-rel-7.0.28-19326674.run 
 cd

 # Install tensorflow
 sudo pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.5.0-cp27-none-linux_x86_64.whl
 export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64"
 export CUDA_HOME=/usr/local/cuda

 # Install Bazel
 git clone https://github.com/bazelbuild/bazel.git
 cd bazel
 git checkout tags/0.1.0
 ./compile.sh
 sudo cp output/bazel /usr/bin
 cd


 # Install TensorFlow
 git clone https://github.com/tensorflow/tensorflow
 cd tensorflow/tensorflow/models/image/mnist
 python convolutional.py 

 # At this point, it breaks down. It works, but doesn't use the GPU. On g2.2xlarge:
 # I tensorflow/core/common_runtime/gpu/gpu_device.cc:611] Ignoring gpu device (device: 0, name: GRID K520, pci bus id: 0000:00:03.0) with Cuda compute capability 3.0. The minimum required Cuda capability is 3.5.

 # On g2.8xlarge:
 # I tensorflow/core/common_runtime/gpu/gpu_device.cc:611] Ignoring gpu device (device: 0, name: GRID K520, pci bus id: 0000:00:03.0) with Cuda compute capability 3.0. The minimum required Cuda capability is 3.5.
 # I tensorflow/core/common_runtime/gpu/gpu_device.cc:611] Ignoring gpu device (device: 1, name: GRID K520, pci bus id: 0000:00:04.0) with Cuda compute capability 3.0. The minimum required Cuda capability is 3.5.
 # I tensorflow/core/common_runtime/gpu/gpu_device.cc:611] Ignoring gpu device (device: 2, name: GRID K520, pci bus id: 0000:00:05.0) with Cuda compute capability 3.0. The minimum required Cuda capability is 3.5.
 # I tensorflow/core/common_runtime/gpu/gpu_device.cc:611] Ignoring gpu device (device: 3, name: GRID K520, pci bus id: 0000:00:06.0) with Cuda compute capability 3.0. The minimum required Cuda capability is 3.5.

 # TODO: Seems like there's a discussion on GitHub about CUDA 3.0 support
 # https://github.com/tensorflow/tensorflow/issues/25
	# This is mostly based on this excellent blog post:
	# http://tleyden.github.io/blog/2014/10/25/cuda-6-dot-5-on-aws-gpu-instance-running-ubuntu-14-dot-04/

	# Install various packages
	sudo apt-get update
	sudo apt-get upgrade -y # choose “install package maintainers version”
	sudo apt-get install -y build-essential python-pip python-dev git python-numpy swig python-dev default-java-sdk zip zlib1g-dev

	# Blacklist Noveau which has some kind of conflict with the nvidia driver
	echo -e "blacklist nouveau\nblacklist lbm-nouveau\noptions nouveau modeset=0\nalias nouveau off\nalias lbm-nouveau off\n" \| sudo tee /etc/modprobe.d/blacklist-nouveau.conf
	echo options nouveau modeset=0 \| sudo tee -a /etc/modprobe.d/nouveau-kms.conf
	sudo update-initramfs -u
	sudo reboot # Reboot (annoying you have to do this in 2015!)

	# Some other annoying thing we have to do
	sudo apt-get install -y linux-image-extra-virtual
	sudo reboot # Not sure why this is needed

	# Install latest Linux headers
	sudo apt-get install -y linux-source linux-headers-`uname -r`

	# Install CUDA 7.0
	wget http://developer.download.nvidia.com/compute/cuda/7_0/Prod/local_installers/cuda_7.0.28_linux.run
	chmod +x cuda_7.0.28_linux.run
	./cuda_7.0.28_linux.run -extract=`pwd`/nvidia_installers
	cd nvidia_installers
	sudo ./NVIDIA-Linux-x86_64-346.46.run
	sudo modprobe nvidia
	sudo ./cuda-linux64-rel-7.0.28-19326674.run
	cd

	# Install tensorflow
	sudo pip install https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow-0.5.0-cp27-none-linux_x86_64.whl
	export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64"
	export CUDA_HOME=/usr/local/cuda

	# Install Bazel
	git clone https://github.com/bazelbuild/bazel.git
	cd bazel
	git checkout tags/0.1.0
	./compile.sh
	sudo cp output/bazel /usr/bin
	cd


	# Install TensorFlow
	git clone https://github.com/tensorflow/tensorflow
	cd tensorflow/tensorflow/models/image/mnist
	python convolutional.py

	# At this point, it breaks down. It works, but doesn't use the GPU. On g2.2xlarge:
	# I tensorflow/core/common_runtime/gpu/gpu_device.cc:611] Ignoring gpu device (device: 0, name: GRID K520, pci bus id: 0000:00:03.0) with Cuda compute capability 3.0. The minimum required Cuda capability is 3.5.

	# On g2.8xlarge:
	# I tensorflow/core/common_runtime/gpu/gpu_device.cc:611] Ignoring gpu device (device: 0, name: GRID K520, pci bus id: 0000:00:03.0) with Cuda compute capability 3.0. The minimum required Cuda capability is 3.5.
	# I tensorflow/core/common_runtime/gpu/gpu_device.cc:611] Ignoring gpu device (device: 1, name: GRID K520, pci bus id: 0000:00:04.0) with Cuda compute capability 3.0. The minimum required Cuda capability is 3.5.
	# I tensorflow/core/common_runtime/gpu/gpu_device.cc:611] Ignoring gpu device (device: 2, name: GRID K520, pci bus id: 0000:00:05.0) with Cuda compute capability 3.0. The minimum required Cuda capability is 3.5.
	# I tensorflow/core/common_runtime/gpu/gpu_device.cc:611] Ignoring gpu device (device: 3, name: GRID K520, pci bus id: 0000:00:06.0) with Cuda compute capability 3.0. The minimum required Cuda capability is 3.5.

	# TODO: Seems like there's a discussion on GitHub about CUDA 3.0 support
	# https://github.com/tensorflow/tensorflow/issues/25
Instance Type	Num GPUs	Examples / Sec	Sec / Batch
g2.2xlarge	1	216.0	0.593
g2.2xlarge	1	225.2	0.568
g2.8xlarge	4	675.2	0.190
Instance Type	s/batch
g2.2xlarge	0.26
g2.8xlarge	0.11 (with --num_gpus=4)
macbook-pro i7 quad end 2013	0.56