Skip to content

Instantly share code, notes, and snippets.

@mlapida
Last active October 3, 2019 01:01
Show Gist options
  • Select an option

  • Save mlapida/e01eedf7946d20b2b822 to your computer and use it in GitHub Desktop.

Select an option

Save mlapida/e01eedf7946d20b2b822 to your computer and use it in GitHub Desktop.

Revisions

  1. Michael Lapidakis renamed this gist Jan 29, 2016. 1 changed file with 0 additions and 0 deletions.
  2. Michael Lapidakis revised this gist Jan 29, 2016. No changes.
  3. Michael Lapidakis revised this gist Dec 8, 2015. No changes.
  4. Michael Lapidakis revised this gist Dec 8, 2015. 1 changed file with 20 additions and 5 deletions.
    25 changes: 20 additions & 5 deletions CloudWatchLogsLambdaKinesisFirehose.py
    Original file line number Diff line number Diff line change
    @@ -10,19 +10,32 @@
    client = boto3.client('firehose')

    def lambda_handler(event, context):

    #capture the CloudWatch log data
    outEvent = str(event['awslogs']['data'])

    #decode and unzip the log data
    outEvent = gzip.GzipFile(fileobj=StringIO(outEvent.decode('base64','strict'))).read()

    #convert the log data from JSON into a dictionary
    cleanEvent = json.loads(outEvent)

    #initiate a list
    s = []
    firehoseName = 'FloLogTestKF'

    #set the name of the Kinesis Firehose Stream
    firehoseName = 'FlowLogTest'

    #loop through the events line by line
    for t in cleanEvent['logEvents']:

    #Transform the data and store it in the "Data" field.
    p={
    #Fields in FlowLogs [version, accountid, interfaceid, srcaddr, dstaddr, srcport, dstport, protocol, packets, bytes, start, stop, action, logstatus]
    #CEF:0|security|flowlogs|1.0|src= | spt= | dst= | dpt= | start= | end= | proto= | out=
    'Data': "CEF:0|AWS CloudWatch|FlowLogs|1.0|src=" + str(t['extractedFields']['srcaddr']) + "|spt=" + str(t['extractedFields']['srcport']) + "|dst=" + str(t['extractedFields']['dstaddr']) + "|dpt=" + str(t['extractedFields']['dstport'])+ "|proto=" + str(t['extractedFields']['protocol'])+ "|start=" + str(t['extractedFields']['start'])+ "|end=" + str(t['extractedFields']['end'])+ "|out=" + str(t['extractedFields']['bytes'])+"\n"
    #Fields in FlowLogs - [version, accountid, interfaceid, srcaddr, dstaddr, srcport, dstport, protocol, packets, bytes, start, stop, action, logstatus]
    'Data': str(t['extractedFields']['start']) + "," + str(t['extractedFields']['dstaddr']) + "," + str(t['extractedFields']['srcaddr']) + "," + str(t['extractedFields']['packets'])+"\n"
    }

    #write the data to our list
    s.insert(len(s),p)

    #limit of 500 records per batch. Break it up if you have to.
    @@ -33,7 +46,7 @@ def lambda_handler(event, context):
    #Empty the list
    s = []

    #send the response to Firehose in bulk
    #when done, send the response to Firehose in bulk
    if len(s) > 0:
    SendToFireHose(firehoseName, s)

    @@ -43,4 +56,6 @@ def SendToFireHose(streamName, records):
    DeliveryStreamName = streamName,
    Records=records
    )

    #log the number of data points written to Kinesis
    print "Wrote the following records to Firehose: " + str(len(records))
  5. Michael Lapidakis revised this gist Dec 8, 2015. 1 changed file with 7 additions and 21 deletions.
    28 changes: 7 additions & 21 deletions CloudWatchLogsLambdaKinesisFirehose.py
    Original file line number Diff line number Diff line change
    @@ -10,32 +10,19 @@
    client = boto3.client('firehose')

    def lambda_handler(event, context):

    #capture the CloudWatch log data
    outEvent = str(event['awslogs']['data'])

    #decode and unzip the log data
    outEvent = gzip.GzipFile(fileobj=StringIO(outEvent.decode('base64','strict'))).read()

    #convert the log data from JSON into a dictionary
    cleanEvent = json.loads(outEvent)

    #initiate a list
    s = []
    firehoseName = 'FloLogTestKF'

    #set the name of the Kinesis Firehose Stream
    firehoseName = 'FlowLogTest'

    #loop through the events line by line
    for t in cleanEvent['logEvents']:

    #Transform the data and store it in the "Data" field.
    p={
    #Fields in FlowLogs - [version, accountid, interfaceid, srcaddr, dstaddr, srcport, dstport, protocol, packets, bytes, start, stop, action, logstatus]
    'Data': str(t['extractedFields']['start']) + "," + str(t['extractedFields']['dstaddr']) + "," + str(t['extractedFields']['srcaddr']) + "," + str(t['extractedFields']['packets'])+"\n"
    #Fields in FlowLogs [version, accountid, interfaceid, srcaddr, dstaddr, srcport, dstport, protocol, packets, bytes, start, stop, action, logstatus]
    #CEF:0|security|flowlogs|1.0|src= | spt= | dst= | dpt= | start= | end= | proto= | out=
    'Data': "CEF:0|AWS CloudWatch|FlowLogs|1.0|src=" + str(t['extractedFields']['srcaddr']) + "|spt=" + str(t['extractedFields']['srcport']) + "|dst=" + str(t['extractedFields']['dstaddr']) + "|dpt=" + str(t['extractedFields']['dstport'])+ "|proto=" + str(t['extractedFields']['protocol'])+ "|start=" + str(t['extractedFields']['start'])+ "|end=" + str(t['extractedFields']['end'])+ "|out=" + str(t['extractedFields']['bytes'])+"\n"
    }

    #write the data to our list
    s.insert(len(s),p)

    #limit of 500 records per batch. Break it up if you have to.
    @@ -46,15 +33,14 @@ def lambda_handler(event, context):
    #Empty the list
    s = []

    #when done, send the response to Firehose in bulk
    SendToFireHose(firehoseName, s)
    #send the response to Firehose in bulk
    if len(s) > 0:
    SendToFireHose(firehoseName, s)

    #function to send record to Kinesis Firehose
    def SendToFireHose(streamName, records):
    response = client.put_record_batch(
    DeliveryStreamName = streamName,
    Records=records
    )

    #log the number of data points written to Kinesis
    print "Wrote the following records to Firehose: " + str(len(records))
  6. Michael Lapidakis revised this gist Dec 7, 2015. 1 changed file with 30 additions and 12 deletions.
    42 changes: 30 additions & 12 deletions CloudWatchLogsLambdaKinesisFirehose.py
    Original file line number Diff line number Diff line change
    @@ -10,33 +10,51 @@
    client = boto3.client('firehose')

    def lambda_handler(event, context):

    #capture the CloudWatch log data
    outEvent = str(event['awslogs']['data'])
    #decode and unpack the logs recieved from CloudWatch

    #decode and unzip the log data
    outEvent = gzip.GzipFile(fileobj=StringIO(outEvent.decode('base64','strict'))).read()
    #convert the logs to json

    #convert the log data from JSON into a dictionary
    cleanEvent = json.loads(outEvent)

    #initiate a list
    s = []

    #set the name of the Kinesis Firehose Stream
    firehoseName = 'FlowLogTest'

    #loop through the events line by line
    for t in cleanEvent['logEvents']:
    #load the results into a list

    #Transform the data and store it in the "Data" field.
    p={
    #Fields in FlowLogs - [version, accountid, interfaceid, srcaddr, dstaddr, srcport, dstport, protocol, packets, bytes, start, stop, action, logstatus]
    'Data': str(t['extractedFields']['start']) + "," + str(t['extractedFields']['dstaddr']) + "," + str(t['extractedFields']['srcaddr']) + "," + str(t['extractedFields']['packets'])+"\n"
    }

    #write the data to our list
    s.insert(len(s),p)

    #limit of 500 records per batch. Break it up if you have to.
    if len(s) > 499:
    #send the response to Firehose in bulk
    response = client.put_record_batch(
    DeliveryStreamName='FlowLogTest',
    Records=s
    )
    SendToFireHose(firehoseName, s)

    #Empty the list if needed
    #Empty the list
    s = []

    #send the response to Firehose in bulk
    #when done, send the response to Firehose in bulk
    SendToFireHose(firehoseName, s)

    #function to send record to Kinesis Firehose
    def SendToFireHose(streamName, records):
    response = client.put_record_batch(
    DeliveryStreamName='FlowLogTest',
    Records=s
    )
    DeliveryStreamName = streamName,
    Records=records
    )

    #log the number of data points written to Kinesis
    print "Wrote the following records to Firehose: " + str(len(records))
  7. Michael Lapidakis created this gist Dec 4, 2015.
    42 changes: 42 additions & 0 deletions CloudWatchLogsLambdaKinesisFirehose.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,42 @@
    import boto3
    import logging
    import json
    import gzip
    from StringIO import StringIO

    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    client = boto3.client('firehose')

    def lambda_handler(event, context):
    outEvent = str(event['awslogs']['data'])
    #decode and unpack the logs recieved from CloudWatch
    outEvent = gzip.GzipFile(fileobj=StringIO(outEvent.decode('base64','strict'))).read()
    #convert the logs to json
    cleanEvent = json.loads(outEvent)
    s = []

    for t in cleanEvent['logEvents']:
    #load the results into a list
    p={
    'Data': str(t['extractedFields']['start']) + "," + str(t['extractedFields']['dstaddr']) + "," + str(t['extractedFields']['srcaddr']) + "," + str(t['extractedFields']['packets'])+"\n"
    }
    s.insert(len(s),p)

    #limit of 500 records per batch. Break it up if you have to.
    if len(s) > 499:
    #send the response to Firehose in bulk
    response = client.put_record_batch(
    DeliveryStreamName='FlowLogTest',
    Records=s
    )

    #Empty the list if needed
    s = []

    #send the response to Firehose in bulk
    response = client.put_record_batch(
    DeliveryStreamName='FlowLogTest',
    Records=s
    )