#!/bin/bash #################################### # BASIC REQUIREMENTS # http://graphite.wikidot.com/installation # Forked from: http://geek.michaelgrace.org/2011/09/how-to-install-graphite-on-ubuntu/ # Ubuntu 11.10 Oneiric Ocelot # Forked from https://gist.github.com/1287170 # Modified to use NGinx + uwsgi instead of Apache, as well as memcached and supervisord, incorporating ideas from # http://blog.adku.com/2011/10/scalable-realtime-stats-with-graphite.html # Uses python based statsite / twisted instead of statsd / node.js # Added NRPE listener for Nagios / OpsView support # Fixed some bugs in supervisord configuration from post above # Designed to fire up an entirely new instance on EC2, mounting a separate EBS volume for storage -- modify values below as appropriate # This script assumes you have the EC2 tools properly configured in your environment # and that you have a local file with the SSH key to be used on the new machine # Last tested & updated 11/12/2011 #################################### ##################################################################### # Customize these variables for your environment # ##################################################################### ami_id='ami-a7f539ce' #32-bit ubuntu 11.10 security_groups='-g sg-cbf8e7a2 -g sg-c7f6e9ae' #graphite-statsd-linux AND linux-ssh instance_user='ubuntu' #other images use root zone_id='us-east-1d' instance_type='c1.medium' volume_size_gb=40 volume_mount_point='/data' name='Graphite Performance Monitoring (Ubuntu Oneiric)' key_path=~/mykey.pem server_name='foo.bar.com' cores=2 memcached_mb=256 # Note that if the destinations are all carbon-caches then this should # exactly match the webapp's CARBONLINK_HOSTS setting in terms of # instances listed (order matters!). carbon_relay_destinations='127.0.0.1:2004' ##servers as comma separated list of servers => carbon01, carbon02, carbon03 carbon_cache_servers='127.0.0.1' ## list of servers MUST INCLUDE TRAILING comma - don't ever use loopback => 'carbon01:11211', 'carbon02:11211', 'carbon03:11211', graphite_additional_memcache_hosts="" ## list of servers MUST INCLUDE TRAILING comma - don't ever use loopback => 'carbon01:7001', 'carbon02:7001', 'carbon03:7001', graphite_additional_cluster_servers="" ## whether or not to use relaying relay_enabled='false' #number of seconds to use between flushing, default of 10, 1 for realtime statsite_flush_interval=10 # 1 seconds * 1209600 datapoints = 2 week of 1 second granularity # 10 seconds * 864000 datapoints = 2 months of 10-second granularity # 60 seconds * 259200 datapoints = 6 months of 1-minute granularity # 10 minutes * 262974 datapoints = ~3 years of 10-minute granularity graphite_retentions='1:1209600,10:864000,60:259200,600:262974' # seconds to cache in memcached, 1 for realtime, default is 60 graphite_memcache_duration=20 ##################################################################### # Fire up the EC2 instance # ##################################################################### instance_id=$(ec2-run-instances ${ami_id} -k ${EC2_KEYPAIR} -t ${instance_type} -z ${zone_id} ${security_groups} | awk '/INSTANCE/{print $2}') ec2-create-tags $instance_id --tag Name="$name" volume_id=$(ec2-create-volume -s ${volume_size_gb} -z ${zone_id} | awk '/VOLUME/{print $2}') #wait for instance and then volume, to become available while [ "$INST_ISREADY" != "running" ]; do instance_description=$(ec2-describe-instances $instance_id) INST_ISREADY=$(echo "$instance_description" | awk '/^INSTANCE/ {print $6}') if [ "$INST_ISREADY" = "running" ]; then break fi echo 'Waiting for instance to start...' sleep 5 done while [ "$VOL_ISREADY" != "available" ]; do VOL_ISREADY=$(ec2-describe-volumes $volume_id | awk '/^VOLUME/ {print $5}') if [ "$VOL_ISREADY" = "available" ]; then break fi echo 'Waiting for volume to become available...' sleep 5 done ec2-attach-volume $volume_id --instance $instance_id --device /dev/sdf #info only available after entering running state instance_host=$(echo "$instance_description" | awk '/INSTANCE/{print $4}') instance_ip=$(echo "$instance_description" | awk '/INSTANCE/{print $15}') echo "Created instance ${instance_id} / ${instance_host} / ${instance_ip}" #mount the disk ssh -i $key_path -o StrictHostKeyChecking=no $instance_user@$instance_host < # This configuration file is read by the start-memcached script provided as # part of the Debian GNU/Linux distribution. # Run memcached as a daemon. This command is implied, and is not needed for the # daemon to run. See the README.Debian that comes with this package for more # information. -d # Log memcacheds output to /var/log/memcached logfile /var/log/memcached.log # Be verbose # -v # Be even more verbose (print client commands as well) # -vv # Start with a cap of 64 megs of memory. Its reasonable, and the daemon default # Note that the daemon will grow to this size, but does not start out holding this much # memory -m 4096 # Default connection port is 11211 -p 11211 # Run the daemon as root. The start-memcached will default to running as root if no # -u command is present in this config file -u nobody # Specify which IP address to listen on. The default is to listen on all IP addresses # This parameter is one of the only security measures that memcached has, so make sure # its listening on a firewalled interface. -l 127.0.0.1 # Limit the number of simultaneous incoming connections. The daemon default is 1024 # -c 1024 # Lock down all paged memory. Consult with the README and homepage before you do this # -k # Return error when memory is exhausted (rather than removing items) # -M # Maximize core file limit # -r ' | tee $temp_path scp -i $key_path $temp_path $instance_user@$instance_host:$temp_path ssh -i $key_path $instance_user@$instance_host "sudo mv $temp_path /etc/memcached.conf" #rm $temp_path #################################### # /etc/statsite.conf #################################### temp_path=/tmp/statsite.conf echo "# Settings for the 'collector' which is the UDP listener [collector] host = 0.0.0.0 port = 8125 # Specify settings for the metrics 'store' which is where graphite is [store] host = 127.0.0.1 port = 2003 [flush] interval = $statsite_flush_interval " | tee $temp_path scp -i $key_path $temp_path $instance_user@$instance_host:$temp_path ssh -i $key_path $instance_user@$instance_host "sudo mv $temp_path /etc/statsite.conf" #rm $temp_path #################################### # /opt/graphite/conf/carbon.conf #################################### line_prefix='# ' if [ "$relay_enabled" = 'true' ]; then line_prefix='' fi temp_path=/tmp/carbon.conf echo "[cache] # Configure carbon directories. # # OS environment variables can be used to tell carbon where graphite is # installed, where to read configuration from and where to write data. # # GRAPHITE_ROOT - Root directory of the graphite installation. # Defaults to ../ # GRAPHITE_CONF_DIR - Configuration directory (where this file lives). # Defaults to \$GRAPHITE_ROOT/conf/ # GRAPHITE_STORAGE_DIR - Storage directory for whipser/rrd/log/pid files. # Defaults to \$GRAPHITE_ROOT/storage/ # # To change other directory paths, add settings to this file. The following # configuration variables are available with these default values: # # STORAGE_DIR = \$GRAPHITE_STORAGE_DIR # LOCAL_DATA_DIR = STORAGE_DIR/whisper/ # WHITELISTS_DIR = STORAGE_DIR/lists/ # CONF_DIR = STORAGE_DIR/conf/ # LOG_DIR = STORAGE_DIR/log/ # PID_DIR = STORAGE_DIR/ # # For FHS style directory structures, use: # # STORAGE_DIR = /var/lib/carbon/ # CONF_DIR = /etc/carbon/ # LOG_DIR = /var/log/carbon/ # PID_DIR = /var/run/ # LOCAL_DATA_DIR = $volume_mount_point #See /opt/graphite/webapp/local_settings.py # Specify the user to drop privileges to # If this is blank carbon runs as the user that invokes it # This user must have write access to the local data directory USER = # Limit the size of the cache to avoid swapping or becoming CPU bound. # Sorts and serving cache queries gets more expensive as the cache grows. # Use the value \"inf\" (infinity) for an unlimited cache size. MAX_CACHE_SIZE = inf # Limits the number of whisper update_many() calls per second, which effectively # means the number of write requests sent to the disk. This is intended to # prevent over-utilizing the disk and thus starving the rest of the system. # When the rate of required updates exceeds this, then carbon's caching will # take effect and increase the overall throughput accordingly. MAX_UPDATES_PER_SECOND = 1000 # Softly limits the number of whisper files that get created each minute. # Setting this value low (like at 50) is a good way to ensure your graphite # system will not be adversely impacted when a bunch of new metrics are # sent to it. The trade off is that it will take much longer for those metrics' # database files to all get created and thus longer until the data becomes usable. # Setting this value high (like \"inf\" for infinity) will cause graphite to create # the files quickly but at the risk of slowing I/O down considerably for a while. MAX_CREATES_PER_MINUTE = 50 LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_PORT = 2003 # Set this to True to enable the UDP listener. By default this is off # because it is very common to run multiple carbon daemons and managing # another (rarely used) port for every carbon instance is not fun. ENABLE_UDP_LISTENER = False UDP_RECEIVER_INTERFACE = 0.0.0.0 UDP_RECEIVER_PORT = 2003 PICKLE_RECEIVER_INTERFACE = 0.0.0.0 PICKLE_RECEIVER_PORT = 2004 # Per security concerns outlined in Bug #817247 the pickle receiver # will use a more secure and slightly less efficient unpickler. # Set this to True to revert to the old-fashioned insecure unpickler. USE_INSECURE_UNPICKLER = False CACHE_QUERY_INTERFACE = 0.0.0.0 CACHE_QUERY_PORT = 7002 # Set this to False to drop datapoints received after the cache # reaches MAX_CACHE_SIZE. If this is True (the default) then sockets # over which metrics are received will temporarily stop accepting # data until the cache size falls below 95% MAX_CACHE_SIZE. USE_FLOW_CONTROL = True # By default, carbon-cache will log every whisper update. This can be excessive and # degrade performance if logging on the same volume as the whisper data is stored. LOG_UPDATES = False # On some systems it is desirable for whisper to write synchronously. # Set this option to True if youd like to try this. Basically it will # shift the onus of buffering writes from the kernel into carbons cache. WHISPER_AUTOFLUSH = False # Enable AMQP if you want to receve metrics using an amqp broker # ENABLE_AMQP = False # Verbose means a line will be logged for every metric received # useful for testing # AMQP_VERBOSE = False # AMQP_HOST = localhost # AMQP_PORT = 5672 # AMQP_VHOST = / # AMQP_USER = guest # AMQP_PASSWORD = guest # AMQP_EXCHANGE = graphite # AMQP_METRIC_NAME_IN_BODY = False # The manhole interface allows you to SSH into the carbon daemon # and get a python interpreter. BE CAREFUL WITH THIS! If you do # something like time.sleep() in the interpreter, the whole process # will sleep! This is *extremely* helpful in debugging, assuming # you are familiar with the code. If you are not, please dont # mess with this, you are asking for trouble :) # # ENABLE_MANHOLE = False # MANHOLE_INTERFACE = 127.0.0.1 # MANHOLE_PORT = 7222 # MANHOLE_USER = admin # MANHOLE_PUBLIC_KEY = ssh-rsa AAAAB3NzaC1yc2EAAAABiwAaAIEAoxN0sv/e4eZCPpi3N3KYvyzRaBaMeS2RsOQ/cDuKv11dlNzVeiyc3RFmCv5Rjwn/lQ79y0zyHxw67qLyhQ/kDzINc4cY41ivuQXm2tPmgvexdrBv5nsfEpjs3gLZfJnyvlcVyWK/lId8WUvEWSWHTzsbtmXAF2raJMdgLTbQ8wE= # Patterns for all of the metrics this machine will store. Read more at # http://en.wikipedia.org/wiki/Advanced_Message_Queuing_Protocol#Bindings # # Example: store all sales, linux servers, and utilization metrics # BIND_PATTERNS = sales.#, servers.linux.#, #.utilization # # Example: store everything # BIND_PATTERNS = # # To configure special settings for the 'foo' carbon-cache instance, uncomment this: #[cache:foo] #LINE_RECEIVER_PORT = 2103 #PICKLE_RECEIVER_PORT = 2104 #CACHE_QUERY_PORT = 7102 # and any other settings you want to customize, defaults are inherited # from [carbon] section. [relay] LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_PORT = 2013 PICKLE_RECEIVER_INTERFACE = 0.0.0.0 PICKLE_RECEIVER_PORT = 2014 # To use consistent hashing instead of the user defined relay-rules.conf, # change this to: # RELAY_METHOD = consistent-hashing RELAY_METHOD = rules # If you use consistent-hashing you may want to add redundancy # of your data by replicating every datapoint to more than # one machine. REPLICATION_FACTOR = 1 # This is a list of carbon daemons we will send any relayed or # generated metrics to. The default provided would send to a single # carbon-cache instance on the default port. However if you # use multiple carbon-cache instances then it would look like this: # # DESTINATIONS = 127.0.0.1:2004:a, 127.0.0.1:2104:b # # The general form is IP:PORT:INSTANCE where the :INSTANCE part is # optional and refers to the 'None' instance if omitted. # # Note that if the destinations are all carbon-caches then this should # exactly match the webapp's CARBONLINK_HOSTS setting in terms of # instances listed (order matters!). DESTINATIONS = $carbon_relay_destinations # This defines the maximum 'message size' between carbon daemons. # You shouldn't need to tune this unless you really know what you're doing. MAX_DATAPOINTS_PER_MESSAGE = 500 MAX_QUEUE_SIZE = 10000 # Set this to False to drop datapoints when any send queue (sending datapoints # to a downstream carbon daemon) hits MAX_QUEUE_SIZE. If this is True (the # default) then sockets over which metrics are received will temporarily stop accepting # data until the send queues fall below 80% MAX_QUEUE_SIZE. USE_FLOW_CONTROL = True [aggregator] LINE_RECEIVER_INTERFACE = 0.0.0.0 LINE_RECEIVER_PORT = 2023 PICKLE_RECEIVER_INTERFACE = 0.0.0.0 PICKLE_RECEIVER_PORT = 2024 DESTINATION_HOST = 127.0.0.1 DESTINATION_PORT = 2004 MAX_QUEUE_SIZE = 10000 # This defines the maximum 'message size' between carbon daemons. # You shouldnt need to tune this unless you really know what youre doing. MAX_DATAPOINTS_PER_MESSAGE = 500 " | tee $temp_path scp -i $key_path $temp_path $instance_user@$instance_host:$temp_path ssh -i $key_path $instance_user@$instance_host "sudo mv $temp_path /opt/graphite/conf/carbon.conf" #rm $temp_path #################################### # /opt/graphite/conf/dashboard.conf #################################### temp_path=/tmp/dashboard.conf echo '# This configuration file controls the behavior of the Dashboard UI, available # at http://my-graphite-server/dashboard/. # # This file must contain a [ui] section that defines the following settings: # [ui] default_graph_width = 400 default_graph_height = 250 automatic_variants = true refresh_interval = 60 # # These settings apply to the UI as a whole, all other sections in this file # pertain only to specific metric types. # # The dashboard presents only metrics that fall into specified naming schemes # defined in this file. This creates a simpler, more targetted view of the # data. The general form for defining a naming scheme is as follows: # #[Metric Type] #scheme = basis.path... #field1.label = Foo #field2.label = Bar # # # Where each will be displayed as a dropdown box # in the UI and the remaining portion of the namespace # shown in the Metric Selector panel. The .label options set the labels # displayed for each dropdown. # # For example: # #[Sales] #scheme = sales... #channel.label = Channel #type.label = Product Type #brand.label = Brand # # This defines a "Sales" metric type that uses 3 dropdowns in the Context Selector # (the upper-left panel) while any deeper metrics (per-product counts or revenue, etc) # will be available in the Metric Selector (upper-right panel). ' | tee $temp_path scp -i $key_path $temp_path $instance_user@$instance_host:$temp_path ssh -i $key_path $instance_user@$instance_host "sudo mv $temp_path /opt/graphite/conf/dashboard.conf" #rm $temp_path #################################### # /opt/graphite/conf/relay-rules.conf #################################### # ONLY USED with clustering / multiple back-ends # temp_path=/tmp/relay-rules.conf echo "[default] default = true servers = $carbon_cache_servers " | tee $temp_path if [ "$relay_enabled" = "true" ]; then scp -i $key_path $temp_path $instance_user@$instance_host:$temp_path ssh -i $key_path $instance_user@$instance_host "sudo mv $temp_path /opt/graphite/conf/relay-rules.conf" fi #rm $temp_path #################################### # /opt/graphite/conf/storage-schemas.conf #################################### temp_path=/tmp/storage-schemas.conf echo "[stats] priority = 110 pattern = .* retentions = $graphite_retentions #realtime # 1 seconds * 1209600 datapoints = 2 week of 1 second granularity # 10 seconds * 864000 datapoints = 2 months of 10-second granularity # 60 seconds * 259200 datapoints = 6 months of 1-minute granularity # 10 minutes * 262974 datapoints = ~3 years of 10-minute granularity #retentions='1:1209600,10:864000,60:259200,600:262974' #standard # 10 seconds * 2160 datapoints = 6 hours of 10-second granularity # 60 seconds * 100080 datapoints = 1 week of 1-minute granularity # 10 minutes * 262974 datapoints = ~3 years of 10-minute granularity " | tee $temp_path scp -i $key_path $temp_path $instance_user@$instance_host:$temp_path ssh -i $key_path $instance_user@$instance_host "sudo mv $temp_path /opt/graphite/conf/storage-schemas.conf" #rm $temp_path #################################### # /opt/graphite/webapp/graphite/local_settings.py #################################### temp_path=/tmp/local_settings.py echo "# Edit this file to override the default graphite settings, do not edit settings.py!!! STORAGE_DIR = '$volume_mount_point' #See /opt/graphite/conf/carbon.conf (purposely without /whisper/) # Turn on debugging and restart apache if you ever see an \"Internal Server Error\" page #DEBUG = True # Set your local timezone (django will *try* to figure this out automatically) # If your graphs appear to be offset by a couple hours then this probably # needs to be explicitly set to your local timezone. #TIME_ZONE = 'America/Los_Angeles' # Uncomment these to enable more performance-related logging #LOG_RENDERING_PERFORMANCE = True #LOG_CACHE_PERFORMANCE = True # Override this if you need to provide documentation specific to your graphite deployment #DOCUMENTATION_URL = \"http://wiki.mycompany.com/graphite\" # Enable email-related features #SMTP_SERVER = \"mail.mycompany.com\" ##################################### # LDAP Authentication Configuration # ##################################### # LDAP / ActiveDirectory authentication setup #USE_LDAP_AUTH = True #LDAP_SERVER = \"ldap.mycompany.com\" #LDAP_PORT = 389 # OR #LDAP_URI = \"ldaps://ldap.mycompany.com:636\" #LDAP_SEARCH_BASE = \"OU=users,DC=mycompany,DC=com\" #LDAP_BASE_USER = \"CN=some_readonly_account,DC=mycompany,DC=com\" #LDAP_BASE_PASS = \"readonly_account_password\" #LDAP_USER_QUERY = \"(username=%s)\" #For Active Directory use \"(sAMAccountName=%s)\" # # If you want to further customize the ldap connection options you should # directly use ldap.set_option to set the ldap module's global options. # For example: # #import ldap #ldap.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_ALLOW) #ldap.set_option(ldap.OPT_X_TLS_CACERTDIR, \"/etc/ssl/ca\") #ldap.set_option(ldap.OPT_X_TLS_CERTFILE, \"/etc/ssl/mycert.pem\") #ldap.set_option(ldap.OPT_X_TLS_KEYFILE, \"/etc/ssl/mykey.pem\") # See http://www.python-ldap.org/ for further details on these options. ########################## # Database Configuration # ########################## # By default sqlite is used. If you cluster multiple webapps you will need # to setup an external database (like mysql) and configure all the webapps # to use the same database. Note that this database is only used to store # django models like saved graphs, dashboards, user preferences, etc. Metric # data is not stored here. # # DON'T FORGET TO RUN 'manage.py syncdb' AFTER SETTING UP A NEW DB! # #DATABASE_ENGINE = 'mysql' # or 'postgres' #DATABASE_NAME = 'graphite' #DATABASE_USER = 'graphite' #DATABASE_PASSWORD = 'graphite-is-awesome' #DATABASE_HOST = 'mysql.mycompany.com' #DATABASE_PORT = '3306' ######################### # Cluster Configuration # ######################### # (To avoid excessive DNS lookups you want to stick to using IP addresses only in this entire section) # # This should list the IP address (and optionally port) of each webapp in your cluster. # Strings are of the form \"ip[:port]\" # Usually this will be the same as MEMCACHE_HOSTS except for the port numbers. # #CLUSTER_SERVERS = [] CLUSTER_SERVERS = [${graphite_additional_cluster_servers}'${instance_ip}:7001'] # This lists all the memcached servers that will be used by this webapp. # If you have a cluster of webapps you want to make sure all of them # have the *exact* same value for this setting. That will maximize cache # efficiency. Setting MEMCACHE_HOSTS to be empty will turn off use of # memcached entirely. # # You should not use the loopback address 127.0.0.1 here because every webapp in # the cluster should use the exact same value and should list every member in the # cluster. #MEMCACHE_HOSTS = ['10.10.10.10:11211', '10.10.10.11:11211', '10.10.10.12:11211'] MEMCACHE_HOSTS = [${graphite_additional_memcache_hosts}'${instance_ip}:11211'] MEMCACHE_DURATION = $graphite_memcache_duration # If you are running multiple carbon-caches on this machine (typically behind a relay using # consistent hashing), you'll need to list the ip address, cache query port, and instance name of each carbon-cache # instance on the local machine (NOT every carbon-cache in the entire cluster). The default cache query port is 7002 # and a common scheme is to use 7102 for instance b, 7202 for instance c, etc. # # You *should* use 127.0.0.1 here. #CARBONLINK_HOSTS = [\"127.0.0.1:7002:a\", \"127.0.0.1:7102:b\", \"127.0.0.1:7202:c\"] CARBONLINK_HOSTS = [\"127.0.0.1:7002\"] " | tee $temp_path scp -i $key_path $temp_path $instance_user@$instance_host:$temp_path ssh -i $key_path $instance_user@$instance_host "sudo mv $temp_path /opt/graphite/webapp/graphite/local_settings.py" #rm $temp_path #################################### # CREATE GRAPHITE-WEB DATABASE # START supervisord (which starts carbon, graphite, etc) # ENSURE PROCESSES DIDNT BLOW #################################### ssh -i $key_path $instance_user@$instance_host <