Skip to content

Instantly share code, notes, and snippets.

@gauravds
Forked from quiver/athena.py
Created February 26, 2020 10:36
Show Gist options
  • Save gauravds/bd56a6777095e00c0acfe71b13788fda to your computer and use it in GitHub Desktop.
Save gauravds/bd56a6777095e00c0acfe71b13788fda to your computer and use it in GitHub Desktop.

Revisions

  1. @quiver quiver revised this gist Jul 8, 2019. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions athena.py
    Original file line number Diff line number Diff line change
    @@ -10,8 +10,8 @@
    $ ls -1
    athena.log # program log
    athena.py # main program
    foo.sql # query execution result
    foo.sql.csv # sql output
    foo.sql # sql
    foo.sql.csv # query result

    $ cat foo.sql.csv # check query result
    "_col0"
  2. @quiver quiver revised this gist Feb 27, 2018. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion athena.py
    Original file line number Diff line number Diff line change
    @@ -32,7 +32,7 @@
    S3BUCKET_NAME = 'XXX'
    DATABASE_NAME = 'YYY'
    S3BUCKET_NAME = 'aws-athena-query-results-018221336085-ap-northeast-1'
    S3BUCKET_NAME = 'aws-athena-query-results-123456789012-ap-northeast-1'
    DATABASE_NAME = 'trid_381'
    @retry(stop_max_attempt_number = 10,
  3. @quiver quiver created this gist Jan 27, 2018.
    93 changes: 93 additions & 0 deletions athena.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,93 @@
    #!/usr/bin/env python
    # vim: set fileencoding=utf8 :
    ```
    $ pip install -U boto3 retrying
    $ export AWS_DEFAULT_PROFILE=test
    $ cat foo.sql
    select count(*)
    from bar
    $ python athena.py foo.sql
    $ ls -1
    athena.log # program log
    athena.py # main program
    foo.sql # query execution result
    foo.sql.csv # sql output

    $ cat foo.sql.csv # check query result
    "_col0"
    "1234"
    '''
    import logging
    import pprint
    import sys
    import boto3
    from retrying import retry
    logging.basicConfig(filename='athena.log',level=logging.INFO)
    athena = boto3.client('athena')
    s3 = boto3.resource('s3')
    S3BUCKET_NAME = 'XXX'
    DATABASE_NAME = 'YYY'
    S3BUCKET_NAME = 'aws-athena-query-results-018221336085-ap-northeast-1'
    DATABASE_NAME = 'trid_381'
    @retry(stop_max_attempt_number = 10,
    wait_exponential_multiplier = 30 * 1000,
    wait_exponential_max = 10 * 60 * 1000)
    def poll_status(_id):
    '''
    poll query status
    '''
    result = athena.get_query_execution(
    QueryExecutionId = _id
    )

    logging.info(pprint.pformat(result['QueryExecution']))
    state = result['QueryExecution']['Status']['State']
    if state == 'SUCCEEDED':
    return result
    elif state == 'FAILED':
    return result
    else:
    raise Exception

    def query_to_athena(filename):
    sql = open(filename, 'r').read()
    result = athena.start_query_execution(
    QueryString = sql,
    QueryExecutionContext = {
    'Database': DATABASE_NAME
    },
    ResultConfiguration = {
    'OutputLocation': 's3://' + S3BUCKET_NAME,
    }
    )

    logging.info(pprint.pformat(result))

    QueryExecutionId = result['QueryExecutionId']
    result = poll_status(QueryExecutionId)

    # save response
    with open(filename + '.log', 'w') as f:
    f.write(pprint.pformat(result, indent = 4))

    # save query result from S3
    if result['QueryExecution']['Status']['State'] == 'SUCCEEDED':
    s3_key = QueryExecutionId + '.csv'
    local_filename = filename + '.csv'
    s3.Bucket(S3BUCKET_NAME).download_file(s3_key, local_filename)

    def main():
    for filename in sys.argv[1:]:
    try:
    query_to_athena(filename)
    except Exception, err:
    print err

    if __name__ == '__main__':
    main()