Skip to content

Instantly share code, notes, and snippets.

@prithvi514
Forked from amalgjose/EmrLauncher
Created March 2, 2017 00:44
Show Gist options
  • Save prithvi514/03fcc4b5a0199e73a9cf96e2b5b1eb99 to your computer and use it in GitHub Desktop.
Save prithvi514/03fcc4b5a0199e73a9cf96e2b5b1eb99 to your computer and use it in GitHub Desktop.

Revisions

  1. @amalgjose amalgjose revised this gist Jan 29, 2015. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions EmrLauncher
    Original file line number Diff line number Diff line change
    @@ -105,10 +105,10 @@ class EmrLauncher(object):

    def main(self):
    try:
    master_type = 'm3.large'
    slave_type = 'm3.large'
    master_type = 'm3.xlarge'
    slave_type = 'm3.xlarge'
    num_instance = 3
    ami_version = '3.1.1'
    ami_version = '2.4.8'

    emr_status = self.launch_emr_cluster(master_type, slave_type, num_instance, ami_version)
    if emr_status == 'SUCCESS':
  2. @amalgjose amalgjose revised this gist Jan 29, 2015. 1 changed file with 5 additions and 5 deletions.
    10 changes: 5 additions & 5 deletions EmrLauncher
    Original file line number Diff line number Diff line change
    @@ -15,12 +15,12 @@ class EmrLauncher(object):
    # Default constructor of the class.
    def __init__(self):
    try:
    self.zone_name = "us-west-2"
    self.zone_name = "ap-southeast-1"
    self.access_key = "xxxxxx"
    self.private_key = "xxxxxxx"
    self.ec2_keyname = "xxxxxxxx"
    self.base_bucket = "s3://emr-bucket/"
    self.bootstrap_script = "bootstrap.sh"
    self.bootstrap_script = "custom-bootstrap.sh"
    self.log_dir = "Logs"
    self.emr_status_wait = 20
    self.conn = ""
    @@ -39,10 +39,10 @@ class EmrLauncher(object):
    def launch_emr_cluster(self, master_type, slave_type, num_instance, ami_version):
    try:
    #Custom Bootstrap step
    bootstrap_step = BootstrapAction("BootStrap", self.bootstrap_script_name, None)
    bootstrap_step = BootstrapAction("CustomBootStrap", self.bootstrap_script_name, None)

    #Modifyting block size to 64 MB
    block_size_conf = 'dfs.block.size=128'
    #Modifyting block size to 256 MB
    block_size_conf = 'dfs.block.size=256'
    hadoop_config_params = ['-h', block_size_conf, '-h']
    hadoop_config_bootstrapper = BootstrapAction('hadoop-config',
    's3://elasticmapreduce/bootstrap-actions/configure-hadoop',
  3. @amalgjose amalgjose revised this gist Oct 15, 2014. 1 changed file with 4 additions and 2 deletions.
    6 changes: 4 additions & 2 deletions EmrLauncher
    Original file line number Diff line number Diff line change
    @@ -34,7 +34,8 @@ class EmrLauncher(object):
    self.log_bucket_name = self.base_bucket + self.log_dir
    self.bootstrap_script_name = self.base_bucket + self.bootstrap_script


    #Method for launching the EMR cluster

    def launch_emr_cluster(self, master_type, slave_type, num_instance, ami_version):
    try:
    #Custom Bootstrap step
    @@ -100,7 +101,8 @@ class EmrLauncher(object):
    logging.error("Launching EMR cluster failed")
    return "FAILED"


    #Main method of the program

    def main(self):
    try:
    master_type = 'm3.large'
  4. @amalgjose amalgjose revised this gist Oct 15, 2014. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion EmrLauncher
    Original file line number Diff line number Diff line change
    @@ -118,7 +118,6 @@ class EmrLauncher(object):


    if __name__ == '__main__':

    launcher = EmrLauncher()
    launcher.main()

  5. @amalgjose amalgjose revised this gist Oct 15, 2014. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion EmrLauncher
    Original file line number Diff line number Diff line change
    @@ -8,6 +8,7 @@ from boto.emr.step import InstallHiveStep
    from boto.emr.step import InstallPigStep
    from boto.regioninfo import RegionInfo

    #Program for launching an EMR cluster

    class EmrLauncher(object):

    @@ -30,10 +31,10 @@ class EmrLauncher(object):
    region=RegionInfo(name=self.zone_name,
    endpoint=self.zone_name + '.elasticmapreduce.amazonaws.com'))


    self.log_bucket_name = self.base_bucket + self.log_dir
    self.bootstrap_script_name = self.base_bucket + self.bootstrap_script


    def launch_emr_cluster(self, master_type, slave_type, num_instance, ami_version):
    try:
    #Custom Bootstrap step
    @@ -99,6 +100,7 @@ class EmrLauncher(object):
    logging.error("Launching EMR cluster failed")
    return "FAILED"


    def main(self):
    try:
    master_type = 'm3.large'
  6. @amalgjose amalgjose created this gist Oct 15, 2014.
    122 changes: 122 additions & 0 deletions EmrLauncher
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,122 @@
    __author__ = 'Amal G Jose'

    import time
    import logging
    from boto.emr.connection import EmrConnection
    from boto.emr.bootstrap_action import BootstrapAction
    from boto.emr.step import InstallHiveStep
    from boto.emr.step import InstallPigStep
    from boto.regioninfo import RegionInfo


    class EmrLauncher(object):

    # Default constructor of the class.
    def __init__(self):
    try:
    self.zone_name = "us-west-2"
    self.access_key = "xxxxxx"
    self.private_key = "xxxxxxx"
    self.ec2_keyname = "xxxxxxxx"
    self.base_bucket = "s3://emr-bucket/"
    self.bootstrap_script = "bootstrap.sh"
    self.log_dir = "Logs"
    self.emr_status_wait = 20
    self.conn = ""
    self.cluster_name = "MyFirstEmrCluster"

    # Establishing EmrConnection
    self.conn = EmrConnection(self.access_key, self.private_key,
    region=RegionInfo(name=self.zone_name,
    endpoint=self.zone_name + '.elasticmapreduce.amazonaws.com'))


    self.log_bucket_name = self.base_bucket + self.log_dir
    self.bootstrap_script_name = self.base_bucket + self.bootstrap_script

    def launch_emr_cluster(self, master_type, slave_type, num_instance, ami_version):
    try:
    #Custom Bootstrap step
    bootstrap_step = BootstrapAction("BootStrap", self.bootstrap_script_name, None)

    #Modifyting block size to 64 MB
    block_size_conf = 'dfs.block.size=128'
    hadoop_config_params = ['-h', block_size_conf, '-h']
    hadoop_config_bootstrapper = BootstrapAction('hadoop-config',
    's3://elasticmapreduce/bootstrap-actions/configure-hadoop',
    hadoop_config_params)
    #Bootstrapping Ganglia
    hadoop_monitor_bootstrapper = BootstrapAction('ganglia-config',
    's3://elasticmapreduce/bootstrap-actions/install-ganglia', '')

    #Bootstrapping Impala
    impala_install_params = ['--install-impala','--base-path', 's3://elasticmapreduce', '--impala-version', 'latest']
    bootstrap_impala_install_step = BootstrapAction("ImpalaInstall", "s3://elasticmapreduce/libs/impala/setup-impala",
    impala_install_params)
    #Hive installation
    hive_install_step = InstallHiveStep();

    #Pig Installation
    pig_install_step = InstallPigStep();

    #Launching the cluster
    jobid = self.conn.run_jobflow(
    self.cluster_name,
    self.log_bucket_name,
    bootstrap_actions=[hadoop_config_bootstrapper, hadoop_monitor_bootstrapper, bootstrap_step,
    bootstrap_impala_install_step],
    ec2_keyname=self.ec2_keyname,
    steps=[hive_install_step, pig_install_step],
    keep_alive=True,
    action_on_failure = 'CANCEL_AND_WAIT',
    master_instance_type=master_type,
    slave_instance_type=slave_type,
    num_instances=num_instance,
    ami_version=ami_version)

    #Enabling the termination protection
    self.conn.set_termination_protection(jobid, True)

    #Checking the state of EMR cluster
    state = self.conn.describe_jobflow(jobid).state
    while state != u'COMPLETED' and state != u'SHUTTING_DOWN' and state != u'FAILED' and state != u'WAITING':
    #sleeping to recheck for status.
    time.sleep(int(self.emr_status_wait))
    state = self.conn.describe_jobflow(jobid).state

    if state == u'SHUTTING_DOWN' or state == u'FAILED':
    logging.error("Launching EMR cluster failed")
    return "ERROR"

    #Check if the state is WAITING. Then launch the next steps
    if state == u'WAITING':
    #Finding the master node dns of EMR cluster
    master_dns = self.conn.describe_jobflow(jobid).masterpublicdnsname
    logging.info("Launched EMR Cluster Successfully")
    logging.info("Master node DNS of EMR " + master_dns)
    return "SUCCESS"
    except:
    logging.error("Launching EMR cluster failed")
    return "FAILED"

    def main(self):
    try:
    master_type = 'm3.large'
    slave_type = 'm3.large'
    num_instance = 3
    ami_version = '3.1.1'

    emr_status = self.launch_emr_cluster(master_type, slave_type, num_instance, ami_version)
    if emr_status == 'SUCCESS':
    logging.info("Emr cluster launched successfully")
    else:
    logging.error("Emr launching failed")
    except:
    logging.error("Emr launching failed")


    if __name__ == '__main__':

    launcher = EmrLauncher()
    launcher.main()