import boto3
client = boto3.client('glue')
response = client.create_crawler(
Name='SalesCSVCrawler',
Role='AWSGlueServiceRoleDefault',
DatabaseName='sales-cvs',
Description='Crawler for generated Sales schema',
Targets={
'S3Targets': [
{
'Path': 's3://ejlp12-etl-demo-bucket/data/csv',
'Exclusions': [
]
},
]
},
SchemaChangePolicy={
'UpdateBehavior': 'UPDATE_IN_DATABASE',
'DeleteBehavior': 'DELETE_FROM_DATABASE'
}
#,Configuration='{ "Version": 1.0, "CrawlerOutput": { "Partitions": { "AddOrUpdateBehavior": "InheritFromTable" } } }'
)
response = client.start_crawler(
Name='SalesCSVCrawler'
)
response = client.update_table(
DatabaseName='sales-cvs',
TableInput={
'Name': 'csv',
'Description': 'Table Sales',
'StorageDescriptor': {
'SerdeInfo': {
'Name': 'OpenCSVSerde',
'SerializationLibrary': 'org.apache.hadoop.hive.serde2.OpenCSVSerde',
'Parameters': {
'separatorChar': ','
}
}
}
}
)
-
-
Save IrishBird/b3c01aad8de3bf2b470f34feff80959d to your computer and use it in GitHub Desktop.
AWS Glue Create Crawler, Run Crawler and update Table to use "org.apache.hadoop.hive.serde2.OpenCSVSerde"
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment