-
-
Save cesardv/b9ae2af491e7ac7a57ad81a684b2a269 to your computer and use it in GitHub Desktop.
Revisions
-
SimplGy revised this gist
Nov 27, 2016 . 1 changed file with 10 additions and 10 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,7 @@ #!/bin/bash # TODO: skip tiny files (so small they couldn't be photos) # TODO: make sure sym links and other file system oddities are handled # TODO: look at paralellization for perf boost # # Constants @@ -9,7 +10,7 @@ CHAR_COUNT=12 BLOCK_COUNT=6 SKIP_SIZE=3 # Every new block is sampled by skipping this amount of blocks to the next position COMPUTE_FULL_HASH=false # Set `true` to trade processing speed for fewer false positives DEFAULT_PATTERN=".*\.(jpg|png|gif|mov|avi|mkv|jpeg)$" # # Parameters @@ -31,10 +32,10 @@ echo "" # # Get list and count of files. Confirm with user if we should proceed # files=$(find . -maxdepth 1 -type f | egrep -i "$PATTERN") count=$(echo "$files" | wc -l | sed 's/^ *//') # The `sed` at the end removes whitespace from wc output echo "Found $count files that match the pattern $PATTERN" read -rp "Rename all? <Y/n> " prompt if [[ $prompt == "n" || $prompt == "N" || $prompt == "NO" || $prompt == "no" ]] then exit 0 @@ -49,8 +50,8 @@ for f in $files do # Hash the full file if [ $COMPUTE_FULL_HASH = true ] ; then hash=$(md5 -q "$f") # Hash an assortment of bytes else @@ -59,16 +60,15 @@ do # Skip along the file, sampling bytes as we go bytes="" for(( i=1; i<=BLOCK_COUNT; ++i )) do let BLOCK=$i*$SKIP_SIZE bytes+=$(dd if="$f" bs=512 count=1 skip=$BLOCK 2> /dev/null) done hash=$(md5 <<< "$bytes") fi shortHash=$(echo "$hash" | cut -c1-$CHAR_COUNT) ext=$(echo "$f" | sed 's/^.*\.//') # If you've already run this script on some of these files, we shouldn't duplicate them. if [[ $f == *"$shortHash"* ]] then @@ -87,6 +87,6 @@ do done echo "$newName <- $f" mv "$f" "$newName" done -
SimplGy revised this gist
Nov 27, 2016 . 1 changed file with 15 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,7 +6,8 @@ # Constants # CHAR_COUNT=12 BLOCK_COUNT=6 SKIP_SIZE=3 # Every new block is sampled by skipping this amount of blocks to the next position COMPUTE_FULL_HASH=false # Set `true` to trade processing speed for fewer false positives DEFAULT_PATTERN=".*\.(jpg|png|gif|mov|avi|mkv)$" @@ -47,11 +48,22 @@ IFS=$'\n' # make newlines the only iteration separator: http://askubuntu.com/que for f in $files do # Hash the full file if [ COMPUTE_FULL_HASH = true ] ; then hash=$(md5 -q $f) # Hash an assortment of bytes else # Naiive: Just grab a continguous chunk of N blocks. But this could be all empty space or all metadata. Too many false positivies. # bytes=$(dd if="$f" bs=512 count=$BLOCK_COUNT skip=$SKIP_START_BLOCKS 2> /dev/null) # Skip along the file, sampling bytes as we go bytes="" for(( i=1; i<=$BLOCK_COUNT; ++i )) do let BLOCK=$i*$SKIP_SIZE bytes+=$(dd if="$f" bs=512 count=1 skip=$BLOCK 2> /dev/null) done hash=$(md5 <<< $bytes) fi shortHash=$(echo $hash | cut -c1-$CHAR_COUNT) -
SimplGy revised this gist
Nov 27, 2016 . 1 changed file with 11 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,7 +8,17 @@ CHAR_COUNT=12 BLOCK_COUNT=3 COMPUTE_FULL_HASH=false # Set `true` to trade processing speed for fewer false positives DEFAULT_PATTERN=".*\.(jpg|png|gif|mov|avi|mkv)$" # # Parameters # if [ -z "$1" ] then PATTERN="$DEFAULT_PATTERN" else PATTERN=$1 fi # # Introduction -
SimplGy revised this gist
Nov 27, 2016 . No changes.There are no files selected for viewing
-
SimplGy revised this gist
Nov 27, 2016 . No changes.There are no files selected for viewing
-
SimplGy revised this gist
Nov 27, 2016 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,7 +8,7 @@ CHAR_COUNT=12 BLOCK_COUNT=3 COMPUTE_FULL_HASH=false # Set `true` to trade processing speed for fewer false positives PATTERN=".*\.(jpg|png|gif|mov|avi|mkv)$" # # Introduction -
SimplGy created this gist
Nov 27, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,70 @@ #!/bin/bash # TODO: skip tiny files (so small they couldn't be photos) # TODO: make sure sym links and other file system oddities are handled # # Constants # CHAR_COUNT=12 BLOCK_COUNT=3 COMPUTE_FULL_HASH=false # Set `true` to trade processing speed for fewer false positives PATTERN=".*\.(jpg|png|gif|mov)$" # # Introduction # echo "This script will get the hash of $BLOCK_COUNT 512 byte blocks for each file it processes" echo "The first $CHAR_COUNT chars of this hash are used to rename the file" echo "" # # Get list and count of files. Confirm with user if we should proceed # files=$(find . -maxdepth 1 -type f | egrep -i $PATTERN) count=$(echo "$files" | wc -l | sed 's/^ *//') # The `sed` at the end removes whitespace from wc output echo "Found $count files that match the pattern $PATTERN" read -p "Rename all? <Y/n> " prompt if [[ $prompt == "n" || $prompt == "N" || $prompt == "NO" || $prompt == "no" ]] then exit 0 fi echo "" # # For every file, compute a hash and rename # IFS=$'\n' # make newlines the only iteration separator: http://askubuntu.com/questions/344407/how-to-read-complete-line-in-for-loop-with-spaces for f in $files do if [ COMPUTE_FULL_HASH = true ] ; then hash=$(md5 -q $f) else firstPartOfFile=$(dd if="$f" bs=512 count=$BLOCK_COUNT 2> /dev/null) hash=$(md5 <<< $firstPartOfFile) fi shortHash=$(echo $hash | cut -c1-$CHAR_COUNT) ext=$(echo "$f" | sed 's/^.*\.//') originalNameWithoutPath="${f##*/}" # If you've already run this script on some of these files, we shouldn't duplicate them. if [[ $f == *"$shortHash"* ]] then echo "Skipping file. Name already contains the hash of its contents: $f" continue fi newName="$shortHash.$ext" # If a file with this name already exists, increment a number until it does not. # This is a likely duplicate, and the whole reason for running this script i=0 while [ -f "$newName" ]; do let i=i+1 newName="$shortHash ($i).$ext" done echo "$newName <- $f" mv $f $newName done