oelesinsc24’s gists

oelesinsc24 / sagemaker-processing-script.py

Created February 15, 2020 12:23

Example data preparation script to run with SageMaker Processing

	import argparse
	import os
	import warnings

	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split

	from sklearn.exceptions import DataConversionWarning
	warnings.filterwarnings(action='ignore', category=DataConversionWarning)

oelesinsc24 / sagemaker-processing-docker.sh

Created February 13, 2020 16:10

Create SageMaker Processing Docker container

	set -eux

	echo "Writing to Dockerfile"
	cat <<EOF > Dockerfile
	FROM python:3.7-slim-buster

	RUN pip3 install pandas==0.25.3 scikit-learn==0.21.3
	ENV PYTHONUNBUFFERED=TRUE

	ENTRYPOINT ["python3"]

oelesinsc24 / get-processing-job-status.py

Created February 12, 2020 16:33

Get Processing Job status with AWS Step Functions Data Science SDK Lambda Step

	## We poll for the processing Job at intervals
	get_processing_job_status = LambdaStep(
	state_id="GetDataProcessingJob",
	parameters={
	"FunctionName": "arn:aws:lambda:eu-west-1:1234567890:function:GetProcessingJobStatus", #replace with the name of the function you created
	"Payload": {
	"JobName": create_processing_job_step.output()['Payload']['JobName']
	}
	}
	)

oelesinsc24 / get-processing-job-status.py

Created February 12, 2020 16:32

Get Processing Job status with AWS Step Functions Data Science SDK Lambda Step

	## We poll for the processing Job at intervals
	get_processing_job_status = LambdaStep(
	state_id="GetDataProcessingJob",
	parameters={
	"FunctionName": "arn:aws:lambda:eu-west-1:961618251897:function:GetProcessingJobStatus", #replace with the name of the function you created
	"Payload": {
	"JobName": create_processing_job_step.output()['Payload']['JobName']
	}
	}
	)

oelesinsc24 / create-processing-job-step.py

Created February 12, 2020 16:29

Create Processing Job with AWS Step Functions Data Science SDK Lambda Step

	data_processing_configuration = dict(
	JobName=execution_input['JobName'],
	IAMRole=execution_input['IAMRole'],
	LocalStorageSizeGB=50,
	S3CodePath=execution_input['S3CodePath'],
	S3InputDataPath=execution_input['S3InputDataPath'],
	S3OutputDataPath=execution_input['S3OutputDataPath'],
	EcrContainerUri=execution_input['EcrContainerUri']
	)

oelesinsc24 / check-processing-job-status.py

Created February 12, 2020 16:26

Lambda function to check SageMaker Processing Job Status

	import boto3
	import json

	sm_client = boto3.client('sagemaker')


	def lambda_handler(event, context):
	"""

	:param event:

oelesinsc24 / create-sagemaker-processing-job.py

Created February 12, 2020 16:25

Create SageMaker Processing Job Lambda Function

	sm_client = boto3.client('sagemaker')
	from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
	BASE_PROCESSING_IMAGE = ''
	INPUT_DATA_DESTINATION = '/opt/ml/processing/input_data'
	PROCESSED_DATA_PATH = '/opt/ml/processing/processed_data'
	DEFAULT_VOLUME_SIZE = 100
	DEFAULT_INSTANCE_TYPE = 'ml.m5.xlarge'
	DEFAULT_INSTANCE_COUNT = 1

oelesinsc24 / stepfunctions-sdk-chain-steps.py

Created January 28, 2020 20:52

AWS Step Functions SDK Chain Workflow Steps

	workflow_definition = Chain([
	data_processing_step,
	training_step,
	model_step,
	transform_step
	])


	workflow = Workflow(
	name='MyTrainTransformDeployWithGlue_v2',

oelesinsc24 / stepfunctions-sdk-training-and-model-step.py

Created January 28, 2020 20:49

AWS Step Functions SDK Training and Model Steps

	xgb = sagemaker.estimator.Estimator(
	get_image_uri(region, 'xgboost'),
	sagemaker_execution_role,
	train_instance_count = 1,
	train_instance_type = 'ml.m4.4xlarge',
	train_volume_size = 5,
	output_path = f's3://{model_bucket}/{prefix}',
	sagemaker_session = session
	)

oelesinsc24 / stepfunctions-sdk-glue-job-step.py

Created January 28, 2020 20:41

AWS Glue Job Step with AWS Step Functions SDK

	data_processing_step = GlueStartJobRunStep(
	state_id='GlueDataProcessingStep',
	parameters={
	'JobName': glue_job_name,
	'Arguments': {
	'--s3_input_data_path': execution_input['S3InputDataPath'],
	'--s3_processed_data_path': execution_input['S3OutputDataPath']#
	}
	}
	)

Olalekan Fuad Elesin oelesinsc24