Created
September 9, 2021 11:46
-
-
Save dgtm/34a7c089a0e0ced41c125917c65cc1e4 to your computer and use it in GitHub Desktop.
Revisions
-
dgtm created this gist
Sep 9, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,276 @@ AWSTemplateFormatVersion: 2010-09-09 Resources: AWSCURDatabase: Type: 'AWS::Glue::Database' Properties: DatabaseInput: Name: 'athenacurcfn_myreport' CatalogId: !Ref AWS::AccountId AWSCURCrawlerComponentFunction: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - glue.amazonaws.com Action: - 'sts:AssumeRole' Path: / ManagedPolicyArns: - !Sub 'arn:${AWS::Partition}:iam::aws:policy/service-role/AWSGlueServiceRole' Policies: - PolicyName: AWSCURCrawlerComponentFunction PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - 'logs:CreateLogGroup' - 'logs:CreateLogStream' - 'logs:PutLogEvents' Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*' - Effect: Allow Action: - 'glue:UpdateDatabase' - 'glue:UpdatePartition' - 'glue:CreateTable' - 'glue:UpdateTable' - 'glue:ImportCatalogToGlue' Resource: '*' - Effect: Allow Action: - 's3:GetObject' - 's3:PutObject' Resource: !Sub 'arn:${AWS::Partition}:s3:::dipeshgtmreport/dipeshgtm/myreport/myreport*' AWSCURCrawlerLambdaExecutor: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: AWSCURCrawlerLambdaExecutor PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - 'logs:CreateLogGroup' - 'logs:CreateLogStream' - 'logs:PutLogEvents' Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*' - Effect: Allow Action: - 'glue:StartCrawler' Resource: '*' AWSCURCrawler: Type: 'AWS::Glue::Crawler' DependsOn: - AWSCURDatabase - AWSCURCrawlerComponentFunction Properties: Name: AWSCURCrawler-myreport Description: A recurring crawler that keeps your CUR table in Athena up-to-date. Role: !GetAtt AWSCURCrawlerComponentFunction.Arn DatabaseName: !Ref AWSCURDatabase Targets: S3Targets: - Path: 's3://dipeshgtmreport/dipeshgtm/myreport/myreport' Exclusions: - '**.json' - '**.yml' - '**.sql' - '**.csv' - '**.gz' - '**.zip' SchemaChangePolicy: UpdateBehavior: UPDATE_IN_DATABASE DeleteBehavior: DELETE_FROM_DATABASE AWSCURInitializer: Type: 'AWS::Lambda::Function' DependsOn: AWSCURCrawler Properties: Code: ZipFile: > const AWS = require('aws-sdk'); const response = require('./cfn-response'); exports.handler = function(event, context, callback) { if (event.RequestType === 'Delete') { response.send(event, context, response.SUCCESS); } else { const glue = new AWS.Glue(); glue.startCrawler({ Name: 'AWSCURCrawler-myreport' }, function(err, data) { if (err) { const responseData = JSON.parse(this.httpResponse.body); if (responseData['__type'] == 'CrawlerRunningException') { callback(null, responseData.Message); } else { const responseString = JSON.stringify(responseData); if (event.ResponseURL) { response.send(event, context, response.FAILED,{ msg: responseString }); } else { callback(responseString); } } } else { if (event.ResponseURL) { response.send(event, context, response.SUCCESS); } else { callback(null, response.SUCCESS); } } }); } }; Handler: 'index.handler' Timeout: 30 Runtime: nodejs12.x ReservedConcurrentExecutions: 1 Role: !GetAtt AWSCURCrawlerLambdaExecutor.Arn AWSStartCURCrawler: Type: 'Custom::AWSStartCURCrawler' Properties: ServiceToken: !GetAtt AWSCURInitializer.Arn AWSS3CUREventLambdaPermission: Type: AWS::Lambda::Permission Properties: Action: 'lambda:InvokeFunction' FunctionName: !GetAtt AWSCURInitializer.Arn Principal: 's3.amazonaws.com' SourceAccount: !Ref AWS::AccountId SourceArn: !Sub 'arn:${AWS::Partition}:s3:::dipeshgtmreport' AWSS3CURLambdaExecutor: Type: 'AWS::IAM::Role' Properties: AssumeRolePolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Principal: Service: - lambda.amazonaws.com Action: - 'sts:AssumeRole' Path: / Policies: - PolicyName: AWSS3CURLambdaExecutor PolicyDocument: Version: 2012-10-17 Statement: - Effect: Allow Action: - 'logs:CreateLogGroup' - 'logs:CreateLogStream' - 'logs:PutLogEvents' Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*' - Effect: Allow Action: - 's3:PutBucketNotification' Resource: !Sub 'arn:${AWS::Partition}:s3:::dipeshgtmreport' AWSS3CURNotification: Type: 'AWS::Lambda::Function' DependsOn: - AWSCURInitializer - AWSS3CUREventLambdaPermission - AWSS3CURLambdaExecutor Properties: Code: ZipFile: > const AWS = require('aws-sdk'); const response = require('./cfn-response'); exports.handler = function(event, context, callback) { const s3 = new AWS.S3(); const putConfigRequest = function(notificationConfiguration) { return new Promise(function(resolve, reject) { s3.putBucketNotificationConfiguration({ Bucket: event.ResourceProperties.BucketName, NotificationConfiguration: notificationConfiguration }, function(err, data) { if (err) reject({ msg: this.httpResponse.body.toString(), error: err, data: data }); else resolve(data); }); }); }; const newNotificationConfig = {}; if (event.RequestType !== 'Delete') { newNotificationConfig.LambdaFunctionConfigurations = [{ Events: [ 's3:ObjectCreated:*' ], LambdaFunctionArn: event.ResourceProperties.TargetLambdaArn || 'missing arn', Filter: { Key: { FilterRules: [ { Name: 'prefix', Value: event.ResourceProperties.ReportKey } ] } } }]; } putConfigRequest(newNotificationConfig).then(function(result) { response.send(event, context, response.SUCCESS, result); callback(null, result); }).catch(function(error) { response.send(event, context, response.FAILED, error); console.log(error); callback(error); }); }; Handler: 'index.handler' Timeout: 30 Runtime: nodejs12.x ReservedConcurrentExecutions: 1 Role: !GetAtt AWSS3CURLambdaExecutor.Arn AWSPutS3CURNotification: Type: 'Custom::AWSPutS3CURNotification' Properties: ServiceToken: !GetAtt AWSS3CURNotification.Arn TargetLambdaArn: !GetAtt AWSCURInitializer.Arn BucketName: 'dipeshgtmreport' ReportKey: 'dipeshgtm/myreport/myreport' AWSCURReportStatusTable: Type: 'AWS::Glue::Table' DependsOn: AWSCURDatabase Properties: DatabaseName: athenacurcfn_myreport CatalogId: !Ref AWS::AccountId TableInput: Name: 'cost_and_usage_data_status' TableType: 'EXTERNAL_TABLE' StorageDescriptor: Columns: - Name: status Type: 'string' InputFormat: 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OutputFormat: 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' SerdeInfo: SerializationLibrary: 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' Location: 's3://dipeshgtmreport/dipeshgtm/myreport/cost_and_usage_data_status/'