# Overview:
# Install cloud_sql_proxy binary with appropriate credentials on all driver and worker nodes.

# References:
# https://cloud.google.com/sql/docs/postgres/connect-external-app#proxy
# https://cloud.google.com/sql/docs/postgres/sql-proxy#authentication-options

# Step 1.
# Follow https://cloud.google.com/sql/docs/postgres/connect-external-app#proxy to set up:
# - enable Cloud SQL API
# - service account credentials, with "Cloud SQL Client" role. download the .json credential file
# - find your instance connection name from the Cloud SQL Instance details page

# Step 2.
# set up a databricks initscript:
dbutils.fs.put("/databricks/init/cluster_name/gcloud.sh",
  """
  #!/bin/bash

  wget -q https://dl.google.com/cloudsql/cloud_sql_proxy.linux.amd64 -O cloud_sql_proxy
  chmod +x cloud_sql_proxy
  ./cloud_sql_proxy -instances=_your_cloud_sql_instance_name_=tcp:3306 -credential_file=/path/to/gcloud_creds.json 2>&1 &
  
  echo "finished installing gcloud postgres proxy"
  """
)
# Run the above code in notebook, and (re)start cluster to pick-up the init script results

# Step 3.
# now in a notebook attached to the cluster initialized as above:
jdbcHostname = "127.0.0.1"  # talk to local proxy
jdbcDatabase = "database_name"
jdbcPort = 3306  # this should line up with the tcp port number specified in init script
username = "db_username"
password = "db_password"
# this example show's postgresql, I'd imagine mySQL to be quite similar
jdbcUrl = "jdbc:postgresql://{0}:{1}/{2}?user={3}&password={4}".format(jdbcHostname, jdbcPort, jdbcDatabase, username, password)

tableName = '_your_table_name_'

spark.read \
     .format("jdbc") \
     .option("driver", "org.postgresql.Driver") \
     .option("url", jdbcUrl) \
     .option("dbtable", tableName) \
     .load()