Skip to content

Instantly share code, notes, and snippets.

@arthemus
Created January 7, 2025 19:25
Show Gist options
  • Save arthemus/2802c9b3f93b61d996533894f6433830 to your computer and use it in GitHub Desktop.
Save arthemus/2802c9b3f93b61d996533894f6433830 to your computer and use it in GitHub Desktop.
save_csv
import getpass
USER = getpass.getuser()
print("User =", USER)
DOWNLOAD_BASE_PATH = f"/user/{USER}/notebooks/downloads"
def save_csv(df: DataFrame, file_name: str):
target_file = f"{file_name}"
folder_to_download = f"{DOWNLOAD_BASE_PATH}/{target_file}"
print(f"Working at {folder_to_download}...")
!hdfs dfs -rm -r -f -skipTrash {folder_to_download}/*
(
df
.coalesce(1)
.write
.mode('overwrite')
.options(header='True', delimiter=';')
.csv(folder_to_download)
)
!hdfs dfs -copyToLocal {folder_to_download} {target_file}
!tar cvzf {target_file}.tar.gz {target_file}/*
!rm -rf {target_file}
!hdfs dfs -copyFromLocal {target_file}.tar.gz {folder_to_download}
!hdfs dfs -rm -r -f -skipTrash {folder_to_download}/*.csv
!hdfs dfs -rm -r -f -skipTrash {folder_to_download}/_SUCCESS
!rm {target_file}.tar.gz
!hdfs dfs -ls {folder_to_download}
print(f"{target_file} available to download at {folder_to_download}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment