Skip to content

Instantly share code, notes, and snippets.

View maimoonaiqbal2000's full-sized avatar

Maimoona Iqbal maimoonaiqbal2000

View GitHub Profile
@maimoonaiqbal2000
maimoonaiqbal2000 / handler.py
Created March 5, 2021 05:08 — forked from tomfa/handler.py
Python: AWS lambda receiving form file (with serverless.yml)
# This file is your Lambda function
import base64
import json
import boto3
def save_to_bucket(event, context):
AWS_BUCKET_NAME = 'my-bucket-name'
s3 = boto3.resource('s3')
import boto3
connection = boto3.client(
'emr',
region_name='us-east-1',
aws_access_key_id='YOUR ACCESS KEY',
ws_secret_access_key='YOUR SECRET KEY',
)
cluster_id = 'give the cluster id'
import boto3
connection = boto3.client(
'emr',
region_name='us-east-1',
aws_access_key_id='YOUR ACCESS KEY',
ws_secret_access_key='YOUR SECRET KEY',
)
cluster_id = 'give the cluster id'
@maimoonaiqbal2000
maimoonaiqbal2000 / spark_aws_lambda.py
Created August 16, 2020 23:56 — forked from tomron/spark_aws_lambda.py
Example of python code to submit spark process as an emr step to AWS emr cluster in AWS lambda function
import sys
import time
import boto3
def lambda_handler(event, context):
conn = boto3.client("emr")
# chooses the first cluster which is Running or Waiting
# possibly can also choose by name or already have the cluster id
clusters = conn.list_clusters()
import json
import pg8000 as dbapi
from pprint import pprint
def getconnection(database,host,port,user,password):
conn= None
try:
conn=dbapi.connect(database=database,host=host, port=port,\
user=user,password=password,ssl=True)
except Exception as err:
@maimoonaiqbal2000
maimoonaiqbal2000 / etl.py
Created July 13, 2020 21:13 — forked from nitinmlvya/etl.py
ETL Code using AWS Glue.
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
args = getResolvedOptions(sys.argv, ['TempDir','JOB_NAME'])
## Initialize the GlueContext and SparkContext
@maimoonaiqbal2000
maimoonaiqbal2000 / aws_glue_avoiding_duplicates.py
Created June 20, 2020 18:04 — forked from michelmilezzi/aws_glue_avoiding_duplicates.py
AWS Glue script showing how to avoid duplicates during a job execution.
import sys
import pydevd
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.dynamicframe import DynamicFrame
from pyspark.sql.functions import col
from awsglue.job import Job