#!/bin/bash
# TODO: skip tiny files (so small they couldn't be photos)
# TODO: make sure sym links and other file system oddities are handled

#
# Constants
#
CHAR_COUNT=12
BLOCK_COUNT=3
COMPUTE_FULL_HASH=false # Set `true` to trade processing speed for fewer false positives
DEFAULT_PATTERN=".*\.(jpg|png|gif|mov|avi|mkv)$"

#
# Parameters
#
if [ -z "$1" ]
then
  PATTERN="$DEFAULT_PATTERN"
else
  PATTERN=$1
fi

#
# Introduction
#
echo "This script will get the hash of $BLOCK_COUNT 512 byte blocks for each file it processes"
echo "The first $CHAR_COUNT chars of this hash are used to rename the file"
echo ""

#
# Get list and count of files. Confirm with user if we should proceed
#
files=$(find . -maxdepth 1 -type f | egrep -i $PATTERN)
count=$(echo "$files" | wc -l | sed 's/^ *//') # The `sed` at the end removes whitespace from wc output
echo "Found $count files that match the pattern $PATTERN"
read -p "Rename all? <Y/n> " prompt
if [[ $prompt == "n" || $prompt == "N" || $prompt == "NO" || $prompt == "no" ]]
then
  exit 0
fi
echo ""

#
# For every file, compute a hash and rename
#
IFS=$'\n' # make newlines the only iteration separator: http://askubuntu.com/questions/344407/how-to-read-complete-line-in-for-loop-with-spaces
for f in $files
do

  if [ COMPUTE_FULL_HASH = true ] ; then
    hash=$(md5 -q $f)
  else
    firstPartOfFile=$(dd if="$f" bs=512 count=$BLOCK_COUNT 2> /dev/null)
    hash=$(md5 <<< $firstPartOfFile)
  fi

  shortHash=$(echo $hash | cut -c1-$CHAR_COUNT)
  ext=$(echo "$f" | sed 's/^.*\.//')
  originalNameWithoutPath="${f##*/}"
  # If you've already run this script on some of these files, we shouldn't duplicate them.
  if [[ $f == *"$shortHash"* ]]
  then
    echo "Skipping file. Name already contains the hash of its contents: $f"
    continue
  fi

  newName="$shortHash.$ext"
  
  # If a file with this name already exists, increment a number until it does not.
  # This is a likely duplicate, and the whole reason for running this script
  i=0
  while [ -f "$newName" ]; do
    let i=i+1
    newName="$shortHash ($i).$ext"
  done

  echo "$newName   <-   $f"
  mv $f $newName

done