import boto3
client = boto3.client('glue')
response = client.create_crawler(
    Name='SalesCSVCrawler',
    Role='AWSGlueServiceRoleDefault',
    DatabaseName='sales-cvs',
    Description='Crawler for generated Sales schema',
    Targets={
        'S3Targets': [
            {
                'Path': 's3://ejlp12-etl-demo-bucket/data/csv',
                'Exclusions': [
                ]
            },
        ]
    },
    SchemaChangePolicy={
        'UpdateBehavior': 'UPDATE_IN_DATABASE',
        'DeleteBehavior': 'DELETE_FROM_DATABASE'
    }
    #,Configuration='{ "Version": 1.0, "CrawlerOutput": { "Partitions": { "AddOrUpdateBehavior": "InheritFromTable" } } }'
)
response = client.start_crawler(
    Name='SalesCSVCrawler'
)
response = client.update_table(
    DatabaseName='sales-cvs',
    TableInput={
        'Name': 'csv',
        'Description': 'Table Sales',
        'StorageDescriptor': {
            'SerdeInfo': {
                'Name': 'OpenCSVSerde',
                'SerializationLibrary': 'org.apache.hadoop.hive.serde2.OpenCSVSerde',
                'Parameters': {
                    'separatorChar': ','
                }
            }
        }
    }
)
- 
      
 - 
        
Save LucasRBarbosa/d2f5ca9a8ee4de777660ca35a6831755 to your computer and use it in GitHub Desktop.  
    AWS Glue Create Crawler, Run Crawler and update Table to use "org.apache.hadoop.hive.serde2.OpenCSVSerde"
  
        
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment