import boto3
client = boto3.client('glue')
response = client.create_crawler(
Name='SalesCSVCrawler',
Role='AWSGlueServiceRoleDefault',
DatabaseName='sales-cvs',
Description='Crawler for generated Sales schema',
Targets={
'S3Targets': [
{
'Path': 's3://ejlp12-etl-demo-bucket/data/csv',
'Exclusions': [
]
},
]
},
SchemaChangePolicy={
'UpdateBehavior': 'UPDATE_IN_DATABASE',
'DeleteBehavior': 'DELETE_FROM_DATABASE'
}
#,Configuration='{ "Version": 1.0, "CrawlerOutput": { "Partitions": { "AddOrUpdateBehavior": "InheritFromTable" } } }'
)
response = client.start_crawler(
Name='SalesCSVCrawler'
)
response = client.update_table(
DatabaseName='sales-cvs',
TableInput={
'Name': 'csv',
'Description': 'Table Sales',
'StorageDescriptor': {
'SerdeInfo': {
'Name': 'OpenCSVSerde',
'SerializationLibrary': 'org.apache.hadoop.hive.serde2.OpenCSVSerde',
'Parameters': {
'separatorChar': ','
}
}
}
}
)
-
-
Save NulledExceptions/26147bf12855e5a7de86dd081d2cad06 to your computer and use it in GitHub Desktop.
AWS Glue Create Crawler, Run Crawler and update Table to use "org.apache.hadoop.hive.serde2.OpenCSVSerde"
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment