#!/bin/bash # wget https://gist.github.com/plutocrat/7636841e480f483a97c0d17568ac0ef1/raw/upload_clean.sh # Dumps your database out, extracts image paths from it and compares it against a list of files from the filesystem # Needs wp-cli to dump the database. # Only works on Linuxy servers # It will give you a chance to bail before it deletes anything. # If it saves you a few hours, show your appreciation with bitcoin: 35Ws8qgKUCiVaca7vg5NWqkfQozPfzb4my # Working Dir WKDIR=zz_temp # Image Storage Dir IMGBK=image_backup # Where is wp-cli WPCLI=/usr/local/bin/wp-cli # Make sure we're in the right dir if [ -e wp-config.php ] then echo "Found wp-config.php. You're in the correct directory." else echo "wp-config not found. Please move this script to the correct directory and try again. " exit 1 fi if [ -e $WPCLI ] then echo "Found wp-cli" else echo "wp-cli not found. Please install and/or update the path in this script. " exit 1 fi mkdir $WKDIR # Get list of files currently on System find wp-content/uploads/ -type f | sort | uniq > $WKDIR/filelist.txt # Dump Database $WPCLI db export $WKDIR/dump.sql --skip-themes # Search database for any mention of files in wp-content/uploads grep -i -o -P "wp-content/uploads/[0-9][0-9][0-9][0-9]/[0-9][0-9]/.*?\.(jpg|png|jpeg|pdf|gif)" $WKDIR/dump.sql | sort | uniq > $WKDIR/dblist.txt # Delete the dump file as it contains sensitive stuff rm $WKDIR/dump.sql DBCOUNT=$(wc -l $WKDIR/dblist.txt | awk '{print $1}') FILECOUNT=$(wc -l $WKDIR/filelist.txt | awk '{print $1}') echo "There are $DBCOUNT files in the database and $FILECOUNT files on the filesystem" echo "Details of these can be found in $WKDIR/dblist.txt and $WKDIR/filelist.txt" echo comm -12 $WKDIR/dblist.txt $WKDIR/filelist.txt > $WKDIR/images_in_both.txt comm -23 $WKDIR/dblist.txt $WKDIR/filelist.txt > $WKDIR/images_only_in_db.txt comm -13 $WKDIR/dblist.txt $WKDIR/filelist.txt > $WKDIR/images_only_in_filesystem.txt COMMONCOUNT=$(wc -l $WKDIR/images_in_both.txt | awk '{print $1}' ) DBONLYCOUNT=$(wc -l $WKDIR/images_only_in_db.txt | awk '{print $1}' ) FILEONLYCOUNT=$(wc -l $WKDIR/images_only_in_filesystem.txt | awk '{print $1}' ) echo "Comparing the files, there are ..." echo " - $COMMONCOUNT files in BOTH the database and filesystem" echo " - $DBONLYCOUNT files are ONLY found in the database. You may want to check your 404 logs for these" echo " - $FILEONLYCOUNT files are ONLY found in the filesystem, and can probably be removed." echo "Look in the following files for details of which files are in which group." ls -hal $WKDIR/images_* echo echo "WARNING" echo "The next step will move all files in $WKDIR/images_only_in_filesystem.txt to $IMGBK" echo "If you want to do this manually, pres CTRL-C to end this script. Remember to clean up $WKDIR" echo read -p "Press RETURN to continue" mkdir $IMGBK rsync -a --files-from=$WKDIR/images_only_in_filesystem.txt ./ ./$IMGBK/ IMAGESIZE=$(du -h image_backup/ --max-depth=0 | awk '{ print $1}' ) echo echo "$IMAGESIZE of images synced into $IMGBK" echo "Now deleting originals" echo # Delete listed files echo "Size of uploads folder before is $( du -h wp-content/uploads/ --max-depth=0 | awk '{ print $1}' )" while read FILE ; do rm "$FILE" done < $WKDIR/images_only_in_filesystem.txt echo "Size of uploads folder after is $( du -h wp-content/uploads/ --max-depth=0 | awk '{ print $1}' )" # Optional echo "If you'd like to regenerate image thumbnails, hit Enter" echo "The process may take a while. You could run it later yourself with " echo " wp-cli media regenerate --only-missing --skip-themes" read -p "If you don't want to do this now, hit CTRL-C to end" $WPCLI media regenerate --only-missing --skip-themes echo "Size of uploads folder after thumbnail rebuild is $( du -h wp-content/uploads/ --max-depth=0 | awk '{ print $1}' )"