#!/bin/bash # Script: GitHubFileDownloader.sh # Author: electblake , pressdarling # Version: 1.1 # Description: This script converts one or more GitHub repository file URLs to its GitHub API URL for file contents, checks if the user is logged into GitHub CLI, and downloads the file. # Source: https://gist.github.com/electblake/7ef3a63e20b3c8db67d9d66f7021d727 # Credits: # - Inspired by answers on: https://stackoverflow.com/questions/9159894/download-specific-files-from-github-in-command-line-not-clone-the-entire-repo # - v1 used "Bash Script" GPT by Widenex for script creation assistance. # # MIT License # Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # # Requires: jq, curl, and GitHub CLI (gh) installed and configured. # # ```bash # # assuming macOS, if you're using Linux then it's probably an `apt` job # brew install jq curl gh # ``` # # **Installation:** # 1. Download this file however you like (Click the download icon on github.com, or https://gist.github.com/ef691eb09202df0b739adec688a72d9a.git...) # 2. Make script executable: `chmod +x ./GitHubFileDownloader.sh` # 3. Move or link it into your path, bonus points if you rename to `gh-dl` for easy typing, e.g.: # # ```bash # mv -i $(PWD)/GithubFileDownloader.sh /usr/local/bin/gh-dl # # # or for macOS users: # ln -s $(PWD)/GithubFileDownloader.sh /usr/local/bin/gh-dl # ```` # # **Example Usage** # - You must be logged into the GitHub CLI. If not, the script will initiate the login flow. # # ```console # # GithubFileDownloader.sh https://github.com/github/docs/blob/main/README.md # File downloaded successfully: README.md # # **NEW: Process Multiple Files** # - If you can get a whole bunch of `https://github.com/[user]/[repo]/blob/[branch]/[path/to/file]` URLs in, say, a text file: # # ```bash # gh-dl $(&2` from the `printf` statements to disable this.' # # TODO: --quiet flag to suppress output, --verbose flag to toggle the >&2 debug log. # # Additional Behaviors: # # 1. Exit Conditions: # - The script will exit if jq or curl are not installed # - The script will exit if GitHub CLI authentication fails # - The script will exit if no URLs are provided # # 2. Authentication: # - Uses web-based GitHub authentication flow if not already authenticated # - Requires GitHub CLI (gh) to be configured before use # # 3. URL Processing: # - Only accepts GitHub URLs in the format: https://github.com/[user]/[repo]/blob/[branch]/[path/to/file] # - Invalid URL formats will be skipped with an error message # - URLs are processed in parallel (default 4 concurrent downloads) # # 4. Output Handling: # - Debug messages are sent to stderr (>&2) # - Download progress is shown for each file # - Creates output directory if it doesn't exist # - Preserves original filename structure in output directory # # 5. Command Line Options: # --output : Specify output directory (default: current directory) # --parallel : Specify number of parallel downloads (default: 4) # # Example with all options: # ./GitHubFileDownloader.sh --output ./downloads --parallel 8 https://github.com/user/repo/blob/main/file1.txt https://github.com/user/repo/blob/main/file2.txt # # Note: The script uses xargs for parallel processing, which may behave differently # on different Unix-like systems (BSD vs GNU) # # $ cat README.md | head -2 # # GitHub Docs # [![Build GitHub Docs On Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/?repo=github) # # ``` # check_gh_cli_login() { printf "Checking GitHub CLI login status...\n" if ! gh auth status &> /dev/null; then printf "You are not logged into the GitHub CLI. Starting login flow...\n" if ! gh auth login --web; then printf "GitHub CLI login failed.\n" >&2 return 1 fi fi } convert_url_to_api() { local input_url="$1" printf "Converting URL to API format: %s\n" "$input_url" >&2 # Debug to stderr local regex='https://github\.com/([^/]+)/([^/]+)/blob/([^/]+)/(.+)' if [[ $input_url =~ $regex ]]; then local user="${BASH_REMATCH[1]}" local repo="${BASH_REMATCH[2]}" local branch="${BASH_REMATCH[3]}" local path="${BASH_REMATCH[4]}" local generated_api_url="https://api.github.com/repos/$user/$repo/contents/$path?ref=$branch" printf "Generated API URL: %s\n" "$generated_api_url" >&2 # Debug to stderr printf "%s" "$generated_api_url" # Output clean API URL else printf "Invalid URL format: %s\n" "$input_url" >&2 return 1 fi } # Function to download the file using GitHub API URL # download_file_using_api() { # local api_url=$(convert_url_to_api "$url") # ... download_file_using_api() { printf "Downloading file using API URL: %s\n" "$1" local api_url="$1" local original_file_name=$(basename "$2") local output_dir="$3" local output_path="${output_dir}/${original_file_name}" printf "Ensuring dependencies are installed...\n" if ! command -v jq &> /dev/null; then printf "Error: 'jq' is required but not installed.\n" >&2 return 1 fi if ! command -v curl &> /dev/null; then printf "Error: 'curl' is required but not installed.\n" >&2 return 1 fi printf "Fetching download URL from GitHub API...\n" local download_url printf "Fetching from GitHub API: %s\n" "$api_url" >&2 # Debugging if ! download_url=$(gh api "$api_url" --jq .download_url 2>&1); then printf "GitHub API request failed: %s\n" "$download_url" >&2 return 1 fi printf "Downloading file: %s\n" "$output_path" if ! curl -sL "$download_url" -o "$output_path"; then printf "Failed to download the file: %s\n" "$output_path" >&2 return 1 fi printf "File downloaded successfully: %s\n" "$output_path" } main() { printf "Starting script with arguments: %s\n" "$*" local output_dir="." local urls=() local -i parallel_jobs=4 # Number of parallel downloads (adjust as needed) while [[ $# -gt 0 ]]; do case "$1" in --output) output_dir="$2" shift 2 ;; --parallel) parallel_jobs="$2" shift 2 ;; *) urls+=("$1") shift ;; esac done if [[ ${#urls[@]} -eq 0 ]]; then printf "Usage: $0 [--output dir] [--parallel N] \n" >&2 return 1 fi printf "Ensuring output directory exists: %s\n" "$output_dir" mkdir -p "$output_dir" printf "Checking GitHub authentication...\n" if ! check_gh_cli_login; then return 1 fi printf "Checking dependencies...\n" if ! command -v jq &> /dev/null || ! command -v curl &> /dev/null; then printf "Error: 'jq' and 'curl' are required but not installed.\n" >&2 return 1 fi export -f convert_url_to_api export -f download_file_using_api export output_dir printf "Processing %d files with %d parallel jobs...\n" "${#urls[@]}" "$parallel_jobs" # Use `xargs -n 1` to ensure only one URL is passed per execution printf "%s\n" "${urls[@]}" | xargs -n 1 -P "$parallel_jobs" bash -c ' url="$0" printf "Processing URL: %s\n" "$url" api_url=$(convert_url_to_api "$url") || { printf "Failed to convert URL: %s\n" "$url" >&2; exit 1; } download_file_using_api "$api_url" "$url" "'"$output_dir"'" ' printf "All downloads complete.\n" } main "$@"