Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save tommydangerous/336d9ec1e519e9f3632511cb4c7e4a6e to your computer and use it in GitHub Desktop.
Save tommydangerous/336d9ec1e519e9f3632511cb4c7e4a6e to your computer and use it in GitHub Desktop.

Revisions

  1. tommydangerous revised this gist May 11, 2023. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion mage_delta-lake_upload_multiple_battle-history.py
    Original file line number Diff line number Diff line change
    @@ -12,7 +12,7 @@
    # Write to Delta Lake for each planet and keep appending the data
    write_deltalake(
    # Change this URI to your own unique URI
    's3://mage-demo-public/battle-history/1337',
    's3://mage-demo-public/battle-history-versioned/1337',
    data=planet_df,
    mode='append',
    storage_options={
  2. tommydangerous revised this gist May 11, 2023. 1 changed file with 5 additions and 2 deletions.
    7 changes: 5 additions & 2 deletions mage_delta-lake_upload_multiple_battle-history.py
    Original file line number Diff line number Diff line change
    @@ -7,18 +7,21 @@
    # Loop through each planet
    for planet in planets:
    # Select a subset of the battle history data for a single planet
    df_planet = df[df['planet'] == planet]
    planet_df = df.query(f"`planet` == '{planet}'")

    # Write to Delta Lake for each planet and keep appending the data
    write_deltalake(
    # Change this URI to your own unique URI
    's3://mage-demo-public/battle-history/1337',
    data=df_planet,
    data=planet_df,
    mode='append',
    storage_options={
    'AWS_REGION': '...',
    'AWS_ACCESS_KEY_ID': '...',
    'AWS_SECRET_ACCESS_KEY': '...',
    'AWS_S3_ALLOW_UNSAFE_RENAME': 'true',
    },
    )
    print(
    f'Created table with {len(planet_df.index)} records for planet {planet}.',
    )
  3. tommydangerous created this gist May 11, 2023.
    24 changes: 24 additions & 0 deletions mage_delta-lake_upload_multiple_battle-history.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,24 @@
    from deltalake.writer import write_deltalake


    # ['Aiur', 'Eos', 'Gaia', 'Kamigawa', 'Korhal', 'Ravnica']
    planets = list(sorted(set(df['planet'].values)))

    # Loop through each planet
    for planet in planets:
    # Select a subset of the battle history data for a single planet
    df_planet = df[df['planet'] == planet]

    # Write to Delta Lake for each planet and keep appending the data
    write_deltalake(
    # Change this URI to your own unique URI
    's3://mage-demo-public/battle-history/1337',
    data=df_planet,
    mode='append',
    storage_options={
    'AWS_REGION': '...',
    'AWS_ACCESS_KEY_ID': '...',
    'AWS_SECRET_ACCESS_KEY': '...',
    'AWS_S3_ALLOW_UNSAFE_RENAME': 'true',
    },
    )