Skip to content

Instantly share code, notes, and snippets.

@codepuncher
Last active March 26, 2021 15:07
Show Gist options
  • Save codepuncher/b3de0a0a78fc0461ba087b01fdfc9671 to your computer and use it in GitHub Desktop.
Save codepuncher/b3de0a0a78fc0461ba087b01fdfc9671 to your computer and use it in GitHub Desktop.

Revisions

  1. codepuncher revised this gist Mar 26, 2021. No changes.
  2. codepuncher created this gist Mar 26, 2021.
    175 changes: 175 additions & 0 deletions fix_post_urls.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,175 @@
    #!/usr/bin/env bash

    # set -x # debug mode - shows all command output

    # Get the full path to the wp program.
    wp=$(which wp)

    # Get the full path to the curl program
    curl=$(which curl)

    # Example path for remote server (e.g. Kinsta).
    path="$HOME/public/current/web/wp"

    echo "Begin fixing Post URLs $(date)"

    # Generate print numbers 3-9. We don't want 1 or 2 because they are irrelevant.
    site_ids=$(printf '%s,' {3..8}; echo 9)
    site_urls=$("$wp" site list --field=domain --site__in="$site_ids" --path="$path")

    # An array for storing the URLs for a search-replace.
    declare -A SR_URLS

    for site_url in $site_urls; do
    read -rp "Do you wish to skip site $site_url? [y/n] " do_skip_site
    if [[ $do_skip_site =~ ^[Yy]$ ]]; then
    continue
    fi

    # Output directory
    output_dir="fix_post_urls_output/$site_url"
    if [ -d "$output_dir" ]; then
    rm -rf "$output_dir"
    else
    mkdir -p "$output_dir"
    fi

    # Clear the search replace array from previous loop.
    SR_URLS=()

    echo "$site_url"

    post_urls_file="$site_url-post-urls.csv"

    # If a redirect file exists, delete it first.
    if [ -f "$output_dir/$post_urls_file" ]; then
    rm "$output_dir/$post_urls_file"
    fi

    current_permastruct=$("$wp" option get permalink_structure --url="$site_url" --path="$path")
    old_path=${current_permastruct//\/%postname%\//}

    # Do not include a leading or trailing slash.
    new_path='about-us/news'

    # Get all URLs for Posts with Post Type of "post".
    post_urls=$("$wp" post list --post_type=post --post_status=publish --field=url --url="$site_url" --path="$path")

    # Generate a CSV for redirects.
    for post_url in $post_urls; do
    # Create the new URL.
    new_url=${post_url/${site_url}${old_path}/${site_url}/${new_path}}
    if [ "$post_url" = "$new_url" ]; then
    echo "$post_url == $new_url"
    echo 'Old/current URL is the same as the new URL. Skipping'
    printf '\n'
    continue 2
    fi

    # This is the row in the redirect import file.
    # source URL,target URL,regex,HTTP code
    echo "$post_url,$new_url,0,301" >> "$output_dir/$post_urls_file"

    SR_URLS["$post_url"]="$new_url"
    done
    echo "Redirects generated for $site_url"

    # Change rewrite rule / permalink structure.
    new_permastruct="/$new_path/%postname%/"
    read -rp "Do you wish to change permalink structure from $current_permastruct to $new_permastruct? [y/n] " do_rewrite_structure
    if [[ $do_rewrite_structure =~ ^[Yy]$ ]]; then
    res=$("$wp" rewrite structure "$new_permastruct" --url="$site_url" --path="$path")

    echo "$res"

    if [ ! "$res" ]; then
    exit 1
    fi
    else
    echo "Skipping rewrite structure for $site_url"
    fi

    # Perform wp search-replace.
    read -rp 'Do you wish to perform a search-replace? [y/n] ' do_replacement
    if [[ $do_replacement =~ ^[Yy]$ ]]; then
    echo "Performing search-replace for $site_url"

    for source_url in "${!SR_URLS[@]}"; do
    target_url="${SR_URLS[$source_url]}"
    res=$("$wp" search-replace \
    "$source_url" \
    "$target_url" \
    --url="$site_url" \
    --path="$path" \
    --network \
    --skip-columns=guid)

    if [ ! "$res" ]; then
    echo "Failed to replace $source_url with $target_url"
    exit 1
    fi
    done
    else
    echo "Skipping search-replace for $site_url"
    fi

    # Import new redirects from file.
    redirect_group_id=10
    read -rp "Do you wish to import new redirects for $site_url? [y/n] " do_redirect_import
    if [[ $do_redirect_import =~ ^[Yy]$ ]]; then
    res=$("$wp" redirection import "$output_dir/$post_urls_file" --format=csv --group="$redirect_group_id" --url="$site_url" --path="$path")

    if [ ! "$res" ]; then
    exit 1;
    fi
    else
    echo "Skipping redirect import for $site_url"
    fi

    # Check if redirects are successful 301.
    read -rp "Do you wish to validate the new redirects for $site_url? [y/n] " do_redirect_check
    if [[ $do_redirect_check =~ ^[Yy]$ ]]; then
    # If a redirect file exists, delete it first.
    redirect_no_301_file="$site_url-redirects-no-301s.csv"
    redirect_incorrect_301_file="$site_url-redirects-incorrect-301s.csv"
    redirect_target_404_file="$site_url-redirects-target-404s.csv"
    redirect_correct_301_file="$site_url-redirects-correct-301s.csv"
    for file in $redirect_no_301_file $redirect_incorrect_301_file $redirect_target_404_file $redirect_correct_301_file; do
    if [ -f "$output_dir/$file" ]; then
    rm "$output_dir/$file" && echo "Deleted $file"
    fi
    done

    for source_url in "${!SR_URLS[@]}"; do
    echo "Performing a check for $source_url"
    target_url="${SR_URLS[$source_url]}"
    # -I "Perform HEAD request, -s "Silent".
    CURL_SOURCE=$("$curl" -I -s "$source_url")
    CURL_TARGET=$("$curl" -I -s "$target_url")
    source_is_301=$(echo "$CURL_SOURCE" | grep -iE '^HTTP\/2 301')
    source_redirects_to_target=$(echo "$CURL_SOURCE" | grep -iE "^Location: $target_url")
    target_is_404=$(echo "$CURL_TARGET" | grep -iE '^HTTP\/2 404')

    echo "$CURL_SOURCE"
    echo "$CURL_TARGET"

    if [ -z "$source_is_301" ]; then
    echo "$source_url" >> "$output_dir/$redirect_no_301_file"
    echo "$source_url does not have a 301 redirect"
    elif [ -z "$source_redirects_to_target" ]; then
    echo "$source_url" >> "$output_dir/$redirect_incorrect_301_file"
    echo "$source_url does not redirect to $target_url"
    elif [ -n "$target_is_404" ]; then
    echo "$target_url" >> "$output_dir/$redirect_target_404_file"
    echo "$target_url is a 404"
    else
    echo "$source_url,$target_url" >> "$output_dir/$redirect_correct_301_file"
    echo "$source_url successfully redirects to $target_url"
    fi
    done
    fi

    printf '\n'
    done

    echo "End fixing Post URLs $(date)"