#!/bin/bash # This script should iterate through the 13K PDF's that I have, and hopefully # tell me which ones are different. # Check to see that yes, we actually got an argument... [ "$#" -eq 1 ] || die "1 argument required, $# provided" # Let's read the output from diff, and then iterate through the list. # # Sample output from diff: # #Binary files /contracts/prod/attachments/offers_2713-2812/271233/618932.pdf and /contracts2/prod/attachments/offers_2713-2812/271233/618932.pdf differ #Binary files /contracts/prod/attachments/offers_2713-2812/271243/619378.pdf and /contracts2/prod/attachments/offers_2713-2812/271243/619378.pdf differ # # while read line do f1=`echo $line | awk '{print $3}'` f2=`echo $line | awk '{print $5}'` # Check to make sure both files exist.. if [ -f $f1 ] then if [ -f $f2 ] then # Ok...let's do this ish! compare $f1 $f2 -compose src diff.pdf gs -o diff.ppm -sDEVICE=ppmraw -r72 -g`identify -format "%[fx:(w)]x%[fx:(h)]" diff.pdf` diff.pdf gs -o white.ppm -sDEVICE=ppmraw -r72 -g`identify -format "%[fx:(w)]x%[fx:(h)]" diff.pdf` -c "showpage" md51=$(md5sum diff.ppm|awk '{print $1}') md52=$(md5sum white.ppm|awk '{print $1}') if [ "x${md52}" == "x${md51}" ] then echo "cp $f1 $f2" >> /root/runme.sh else # The files are different... echo "$f1 $f2" >> /root/imagediff.out fi fi fi done < "$1"