Skip to content

Instantly share code, notes, and snippets.

@dgtm
Created September 9, 2021 11:46
Show Gist options
  • Select an option

  • Save dgtm/34a7c089a0e0ced41c125917c65cc1e4 to your computer and use it in GitHub Desktop.

Select an option

Save dgtm/34a7c089a0e0ced41c125917c65cc1e4 to your computer and use it in GitHub Desktop.

Revisions

  1. dgtm created this gist Sep 9, 2021.
    276 changes: 276 additions & 0 deletions cffile
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,276 @@
    AWSTemplateFormatVersion: 2010-09-09
    Resources:

    AWSCURDatabase:
    Type: 'AWS::Glue::Database'
    Properties:
    DatabaseInput:
    Name: 'athenacurcfn_myreport'
    CatalogId: !Ref AWS::AccountId



    AWSCURCrawlerComponentFunction:
    Type: 'AWS::IAM::Role'
    Properties:
    AssumeRolePolicyDocument:
    Version: 2012-10-17
    Statement:
    - Effect: Allow
    Principal:
    Service:
    - glue.amazonaws.com
    Action:
    - 'sts:AssumeRole'
    Path: /
    ManagedPolicyArns:
    - !Sub 'arn:${AWS::Partition}:iam::aws:policy/service-role/AWSGlueServiceRole'
    Policies:
    - PolicyName: AWSCURCrawlerComponentFunction
    PolicyDocument:
    Version: 2012-10-17
    Statement:
    - Effect: Allow
    Action:
    - 'logs:CreateLogGroup'
    - 'logs:CreateLogStream'
    - 'logs:PutLogEvents'
    Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*'
    - Effect: Allow
    Action:
    - 'glue:UpdateDatabase'
    - 'glue:UpdatePartition'
    - 'glue:CreateTable'
    - 'glue:UpdateTable'
    - 'glue:ImportCatalogToGlue'
    Resource: '*'
    - Effect: Allow
    Action:
    - 's3:GetObject'
    - 's3:PutObject'
    Resource: !Sub 'arn:${AWS::Partition}:s3:::dipeshgtmreport/dipeshgtm/myreport/myreport*'


    AWSCURCrawlerLambdaExecutor:
    Type: 'AWS::IAM::Role'
    Properties:
    AssumeRolePolicyDocument:
    Version: 2012-10-17
    Statement:
    - Effect: Allow
    Principal:
    Service:
    - lambda.amazonaws.com
    Action:
    - 'sts:AssumeRole'
    Path: /
    Policies:
    - PolicyName: AWSCURCrawlerLambdaExecutor
    PolicyDocument:
    Version: 2012-10-17
    Statement:
    - Effect: Allow
    Action:
    - 'logs:CreateLogGroup'
    - 'logs:CreateLogStream'
    - 'logs:PutLogEvents'
    Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*'
    - Effect: Allow
    Action:
    - 'glue:StartCrawler'
    Resource: '*'


    AWSCURCrawler:
    Type: 'AWS::Glue::Crawler'
    DependsOn:
    - AWSCURDatabase
    - AWSCURCrawlerComponentFunction
    Properties:
    Name: AWSCURCrawler-myreport
    Description: A recurring crawler that keeps your CUR table in Athena up-to-date.
    Role: !GetAtt AWSCURCrawlerComponentFunction.Arn
    DatabaseName: !Ref AWSCURDatabase
    Targets:
    S3Targets:
    - Path: 's3://dipeshgtmreport/dipeshgtm/myreport/myreport'
    Exclusions:
    - '**.json'
    - '**.yml'
    - '**.sql'
    - '**.csv'
    - '**.gz'
    - '**.zip'
    SchemaChangePolicy:
    UpdateBehavior: UPDATE_IN_DATABASE
    DeleteBehavior: DELETE_FROM_DATABASE


    AWSCURInitializer:
    Type: 'AWS::Lambda::Function'
    DependsOn: AWSCURCrawler
    Properties:
    Code:
    ZipFile: >
    const AWS = require('aws-sdk');
    const response = require('./cfn-response');
    exports.handler = function(event, context, callback) {
    if (event.RequestType === 'Delete') {
    response.send(event, context, response.SUCCESS);
    } else {
    const glue = new AWS.Glue();
    glue.startCrawler({ Name: 'AWSCURCrawler-myreport' }, function(err, data) {
    if (err) {
    const responseData = JSON.parse(this.httpResponse.body);
    if (responseData['__type'] == 'CrawlerRunningException') {
    callback(null, responseData.Message);
    } else {
    const responseString = JSON.stringify(responseData);
    if (event.ResponseURL) {
    response.send(event, context, response.FAILED,{ msg: responseString });
    } else {
    callback(responseString);
    }
    }
    }
    else {
    if (event.ResponseURL) {
    response.send(event, context, response.SUCCESS);
    } else {
    callback(null, response.SUCCESS);
    }
    }
    });
    }
    };
    Handler: 'index.handler'
    Timeout: 30
    Runtime: nodejs12.x
    ReservedConcurrentExecutions: 1
    Role: !GetAtt AWSCURCrawlerLambdaExecutor.Arn


    AWSStartCURCrawler:
    Type: 'Custom::AWSStartCURCrawler'
    Properties:
    ServiceToken: !GetAtt AWSCURInitializer.Arn


    AWSS3CUREventLambdaPermission:
    Type: AWS::Lambda::Permission
    Properties:
    Action: 'lambda:InvokeFunction'
    FunctionName: !GetAtt AWSCURInitializer.Arn
    Principal: 's3.amazonaws.com'
    SourceAccount: !Ref AWS::AccountId
    SourceArn: !Sub 'arn:${AWS::Partition}:s3:::dipeshgtmreport'


    AWSS3CURLambdaExecutor:
    Type: 'AWS::IAM::Role'
    Properties:
    AssumeRolePolicyDocument:
    Version: 2012-10-17
    Statement:
    - Effect: Allow
    Principal:
    Service:
    - lambda.amazonaws.com
    Action:
    - 'sts:AssumeRole'
    Path: /
    Policies:
    - PolicyName: AWSS3CURLambdaExecutor
    PolicyDocument:
    Version: 2012-10-17
    Statement:
    - Effect: Allow
    Action:
    - 'logs:CreateLogGroup'
    - 'logs:CreateLogStream'
    - 'logs:PutLogEvents'
    Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*'
    - Effect: Allow
    Action:
    - 's3:PutBucketNotification'
    Resource: !Sub 'arn:${AWS::Partition}:s3:::dipeshgtmreport'


    AWSS3CURNotification:
    Type: 'AWS::Lambda::Function'
    DependsOn:
    - AWSCURInitializer
    - AWSS3CUREventLambdaPermission
    - AWSS3CURLambdaExecutor
    Properties:
    Code:
    ZipFile: >
    const AWS = require('aws-sdk');
    const response = require('./cfn-response');
    exports.handler = function(event, context, callback) {
    const s3 = new AWS.S3();
    const putConfigRequest = function(notificationConfiguration) {
    return new Promise(function(resolve, reject) {
    s3.putBucketNotificationConfiguration({
    Bucket: event.ResourceProperties.BucketName,
    NotificationConfiguration: notificationConfiguration
    }, function(err, data) {
    if (err) reject({ msg: this.httpResponse.body.toString(), error: err, data: data });
    else resolve(data);
    });
    });
    };
    const newNotificationConfig = {};
    if (event.RequestType !== 'Delete') {
    newNotificationConfig.LambdaFunctionConfigurations = [{
    Events: [ 's3:ObjectCreated:*' ],
    LambdaFunctionArn: event.ResourceProperties.TargetLambdaArn || 'missing arn',
    Filter: { Key: { FilterRules: [ { Name: 'prefix', Value: event.ResourceProperties.ReportKey } ] } }
    }];
    }
    putConfigRequest(newNotificationConfig).then(function(result) {
    response.send(event, context, response.SUCCESS, result);
    callback(null, result);
    }).catch(function(error) {
    response.send(event, context, response.FAILED, error);
    console.log(error);
    callback(error);
    });
    };
    Handler: 'index.handler'
    Timeout: 30
    Runtime: nodejs12.x
    ReservedConcurrentExecutions: 1
    Role: !GetAtt AWSS3CURLambdaExecutor.Arn


    AWSPutS3CURNotification:
    Type: 'Custom::AWSPutS3CURNotification'
    Properties:
    ServiceToken: !GetAtt AWSS3CURNotification.Arn
    TargetLambdaArn: !GetAtt AWSCURInitializer.Arn
    BucketName: 'dipeshgtmreport'
    ReportKey: 'dipeshgtm/myreport/myreport'


    AWSCURReportStatusTable:
    Type: 'AWS::Glue::Table'
    DependsOn: AWSCURDatabase
    Properties:
    DatabaseName: athenacurcfn_myreport
    CatalogId: !Ref AWS::AccountId
    TableInput:
    Name: 'cost_and_usage_data_status'
    TableType: 'EXTERNAL_TABLE'
    StorageDescriptor:
    Columns:
    - Name: status
    Type: 'string'
    InputFormat: 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
    OutputFormat: 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
    SerdeInfo:
    SerializationLibrary: 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
    Location: 's3://dipeshgtmreport/dipeshgtm/myreport/cost_and_usage_data_status/'