import boto3
client = boto3.client('glue')
response = client.create_crawler(
Name='SalesCSVCrawler',
Role='AWSGlueServiceRoleDefault',
DatabaseName='sales-cvs',
Description='Crawler for generated Sales schema',
Targets={
'S3Targets': [
{
'Path': 's3://ejlp12-etl-demo-bucket/data/csv',
'Exclusions': [
]
},
]
},
SchemaChangePolicy={
'UpdateBehavior': 'UPDATE_IN_DATABASE',
'DeleteBehavior': 'DELETE_FROM_DATABASE'
}
#,Configuration='{ "Version": 1.0, "CrawlerOutput": { "Partitions": { "AddOrUpdateBehavior": "InheritFromTable" } } }'
)
response = client.start_crawler(
Name='SalesCSVCrawler'
)
response = client.update_table(
DatabaseName='sales-cvs',
TableInput={
'Name': 'csv',
'Description': 'Table Sales',
'StorageDescriptor': {
'SerdeInfo': {
'Name': 'OpenCSVSerde',
'SerializationLibrary': 'org.apache.hadoop.hive.serde2.OpenCSVSerde',
'Parameters': {
'separatorChar': ','
}
}
}
}
)
-
-
Save vaquarkhan/8f9c6bde1c5e0ed1d1ac4108f72bda5d to your computer and use it in GitHub Desktop.
AWS Glue Create Crawler, Run Crawler and update Table to use "org.apache.hadoop.hive.serde2.OpenCSVSerde"
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment