#!/bin/bash # TODO: skip tiny files (so small they couldn't be photos) # TODO: make sure sym links and other file system oddities are handled # # Constants # CHAR_COUNT=12 BLOCK_COUNT=3 COMPUTE_FULL_HASH=false # Set `true` to trade processing speed for fewer false positives DEFAULT_PATTERN=".*\.(jpg|png|gif|mov|avi|mkv)$" # # Parameters # if [ -z "$1" ] then PATTERN="$DEFAULT_PATTERN" else PATTERN=$1 fi # # Introduction # echo "This script will get the hash of $BLOCK_COUNT 512 byte blocks for each file it processes" echo "The first $CHAR_COUNT chars of this hash are used to rename the file" echo "" # # Get list and count of files. Confirm with user if we should proceed # files=$(find . -maxdepth 1 -type f | egrep -i $PATTERN) count=$(echo "$files" | wc -l | sed 's/^ *//') # The `sed` at the end removes whitespace from wc output echo "Found $count files that match the pattern $PATTERN" read -p "Rename all? " prompt if [[ $prompt == "n" || $prompt == "N" || $prompt == "NO" || $prompt == "no" ]] then exit 0 fi echo "" # # For every file, compute a hash and rename # IFS=$'\n' # make newlines the only iteration separator: http://askubuntu.com/questions/344407/how-to-read-complete-line-in-for-loop-with-spaces for f in $files do if [ COMPUTE_FULL_HASH = true ] ; then hash=$(md5 -q $f) else firstPartOfFile=$(dd if="$f" bs=512 count=$BLOCK_COUNT 2> /dev/null) hash=$(md5 <<< $firstPartOfFile) fi shortHash=$(echo $hash | cut -c1-$CHAR_COUNT) ext=$(echo "$f" | sed 's/^.*\.//') originalNameWithoutPath="${f##*/}" # If you've already run this script on some of these files, we shouldn't duplicate them. if [[ $f == *"$shortHash"* ]] then echo "Skipping file. Name already contains the hash of its contents: $f" continue fi newName="$shortHash.$ext" # If a file with this name already exists, increment a number until it does not. # This is a likely duplicate, and the whole reason for running this script i=0 while [ -f "$newName" ]; do let i=i+1 newName="$shortHash ($i).$ext" done echo "$newName <- $f" mv $f $newName done