Skip to content

Instantly share code, notes, and snippets.

@mikeapted
Last active October 3, 2018 14:43
Show Gist options
  • Select an option

  • Save mikeapted/f7c8fcf7c3565861ac3b9c6a2b2140f3 to your computer and use it in GitHub Desktop.

Select an option

Save mikeapted/f7c8fcf7c3565861ac3b9c6a2b2140f3 to your computer and use it in GitHub Desktop.

Revisions

  1. mikeapted renamed this gist Oct 3, 2018. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. mikeapted renamed this gist Oct 3, 2018. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  3. mikeapted created this gist Oct 3, 2018.
    31 changes: 31 additions & 0 deletions lab 3 step 18
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,31 @@
    import sys
    from awsglue.transforms import *
    from awsglue.utils import getResolvedOptions
    from pyspark.context import SparkContext
    from awsglue.context import GlueContext
    from awsglue.job import Job

    ## @params: [JOB_NAME]
    args = getResolvedOptions(sys.argv, ['JOB_NAME'])

    sc = SparkContext()
    glueContext = GlueContext(sc)
    spark = glueContext.spark_session
    job = Job(glueContext)
    job.init(args['JOB_NAME'], args)
    ## @type: DataSource
    ## @args: [database = "YourInitial_bigdata", table_name = "raw2018", transformation_ctx = "datasource0"]
    ## @return: datasource0
    ## @inputs: []
    datasource0 = glueContext.create_dynamic_frame.from_catalog(database = "YourInitial_bigdata", table_name = "raw2018", transformation_ctx = "datasource0")
    ## @type: ApplyMapping
    ## @args: [mapping = [("sensortype", "int", "sensortype", "int"), ("sensorvalue", "int", "sensorvalue", "int")], transformation_ctx = "applymapping1"]
    ## @return: applymapping1
    ## @inputs: [frame = datasource0]
    applymapping1 = ApplyMapping.apply(frame = datasource0, mappings = [("sensortype", "int", "sensortype", "int"), ("sensortype", "int", "sensortype1", "int"), ("sensorvalue", "int", "sensorvalue", "int")], transformation_ctx = "applymapping1")
    ## @type: DataSink
    ## @args: [connection_type = "s3", connection_options = {"path": "s3://YourInitial-bigdata-bucket/ml/trainingdata"}, format = "csv", transformation_ctx = "datasink2"]
    ## @return: datasink2
    ## @inputs: [frame = applymapping1]
    datasink2 = glueContext.write_dynamic_frame.from_options(frame = applymapping1, connection_type = "s3", connection_options = {"path": "s3://YourInitial-bigdata-bucket/ml/trainingdata"}, format = "csv", format_options = {"writeHeader": False}, transformation_ctx = "datasink2")
    job.commit()