Skip to content

Instantly share code, notes, and snippets.

@sergio
Forked from brechtm/diffpdf.sh
Created April 7, 2017 18:06
Show Gist options
  • Select an option

  • Save sergio/7beee661555daf0d6661a815a84e4ef3 to your computer and use it in GitHub Desktop.

Select an option

Save sergio/7beee661555daf0d6661a815a84e4ef3 to your computer and use it in GitHub Desktop.

Revisions

  1. @brechtm brechtm revised this gist Jul 7, 2016. 1 changed file with 45 additions and 10 deletions.
    55 changes: 45 additions & 10 deletions diffpdf.sh
    Original file line number Diff line number Diff line change
    @@ -1,15 +1,16 @@
    #!/bin/sh
    #!/bin/bash

    # usage: diffpdf.sh file_1.pdf file_2.pdf
    # usage: diffpdf.sh fidle_1.pdf file_2.pdf

    # requirements:
    # - ImageMagick
    # - Poppler's pdftoppm and pdfinfo tools (works with 0.41.0, NOT with 0.42.0)
    # - Poppler's pdftoppm and pdfinfo tools (works with 0.18.4 and 0.41.0,
    # fails with 0.42.0)
    # (could be replaced with Ghostscript if speed is
    # not important - see commented commands below)

    DIFFDIR="pdfdiff" # directory to place diff images in
    MAXPROCS=8 # number of parallel processes
    DIFFDIR="pdfdiff" # directory to place diff images in
    MAXPROCS=$(getconf _NPROCESSORS_ONLN) # number of parallel processes

    pdf_file1=$1
    pdf_file2=$2
    @@ -44,12 +45,15 @@ function diff_page {
    # -highlight-color blue pdfdiff/$page_number.png

    if (($? > 0)); then
    echo "Problem running pdftoppm or convert!"
    exit 1
    fi
    grayscale=$(convert pdfdiff/$page_number.jpg -colorspace HSL -channel g -separate +channel -format "%[fx:mean]" info:)
    if [ "$grayscale" != "0" ]; then
    echo "page $page_number ($grayscale)"
    fi
    return 1
    fi
    return 0
    }

    function num_pages {
    @@ -75,6 +79,7 @@ min_pages=$(minimum $pdf1_num_pages $pdf2_num_pages)

    if [ "$pdf1_num_pages" -ne "$pdf2_num_pages" ]; then
    echo "PDF files have different lengths ($pdf1_num_pages and $pdf2_num_pages)"
    rc=1
    fi

    if [ -d "$DIFFDIR" ]; then
    @@ -83,12 +88,42 @@ else
    mkdir $DIFFDIR
    fi


    # get exit status from subshells http://stackoverflow.com/a/29535256/438249

    function wait_for_processes {
    local rc=0

    while (( "$#" )); do
    # wait returns the exit status for the process
    if ! wait "$1"; then
    rc=1
    fi
    shift
    done
    return $rc
    }

    function howmany() {
    echo $#
    }

    rc=0
    pids=""
    for page_number in `seq 1 $min_pages`;
    do
    diff_page $pdf_file1 $pdf_file2 $page_number &
    # echo "$page_number \c"
    if [ "$(($page_number % $MAXPROCS))" -eq "0" ]; then
    wait
    pids+=" $!"
    if [ $(howmany $pids) -eq "$MAXPROCS" ]; then
    if ! wait_for_processes $pids; then
    rc=1
    fi
    pids=""
    fi
    done
    wait

    if ! wait_for_processes $pids; then
    rc=1
    fi

    exit $rc
  2. @brechtm brechtm revised this gist Jul 7, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion diffpdf.sh
    Original file line number Diff line number Diff line change
    @@ -4,7 +4,7 @@

    # requirements:
    # - ImageMagick
    # - Poppler's pdftoppm and pdfinfo tools
    # - Poppler's pdftoppm and pdfinfo tools (works with 0.41.0, NOT with 0.42.0)
    # (could be replaced with Ghostscript if speed is
    # not important - see commented commands below)

  3. @brechtm brechtm revised this gist Mar 31, 2016. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion diffpdf.sh
    Original file line number Diff line number Diff line change
    @@ -4,7 +4,9 @@

    # requirements:
    # - ImageMagick
    # - poppler's pdftoppm (or Ghostscript if speed is not important)
    # - Poppler's pdftoppm and pdfinfo tools
    # (could be replaced with Ghostscript if speed is
    # not important - see commented commands below)

    DIFFDIR="pdfdiff" # directory to place diff images in
    MAXPROCS=8 # number of parallel processes
  4. @brechtm brechtm revised this gist Mar 31, 2016. 1 changed file with 7 additions and 1 deletion.
    8 changes: 7 additions & 1 deletion diffpdf.sh
    Original file line number Diff line number Diff line change
    @@ -60,6 +60,12 @@ function minimum {
    echo $(( $1 < $2 ? $1 : $2 ))
    }

    # guard agains accidental deletion of files in the root directory
    if [ -z "$DIFFDIR" ]; then
    echo "DIFFDIR needs to be set!"
    exit 1
    fi

    pdf1_num_pages=$(num_pages $pdf_file1)
    pdf2_num_pages=$(num_pages $pdf_file2)

    @@ -70,7 +76,7 @@ if [ "$pdf1_num_pages" -ne "$pdf2_num_pages" ]; then
    fi

    if [ -d "$DIFFDIR" ]; then
    rm -rf $DIFFDIR/*
    rm -f $DIFFDIR/*
    else
    mkdir $DIFFDIR
    fi
  5. @brechtm brechtm revised this gist Mar 18, 2016. No changes.
  6. @brechtm brechtm created this gist Mar 18, 2016.
    86 changes: 86 additions & 0 deletions diffpdf.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,86 @@
    #!/bin/sh

    # usage: diffpdf.sh file_1.pdf file_2.pdf

    # requirements:
    # - ImageMagick
    # - poppler's pdftoppm (or Ghostscript if speed is not important)

    DIFFDIR="pdfdiff" # directory to place diff images in
    MAXPROCS=8 # number of parallel processes

    pdf_file1=$1
    pdf_file2=$2

    function diff_page {
    # based on http://stackoverflow.com/a/33673440/438249
    pdf_file1=$1
    pdf_file2=$2
    page_number=$3
    page_index=$(($page_number - 1))

    # 2+x faster
    (cat $pdf_file1 | pdftoppm -f $page_number -singlefile -gray - | convert - miff:- ; \
    cat $pdf_file2 | pdftoppm -f $page_number -singlefile -gray - | convert - miff:- ) | \
    convert - \( -clone 0-1 -compose darken -composite \) \
    -channel RGB -combine $DIFFDIR/$page_number.jpg

    # 2x faster (breaks when using TIFF format instead of JPEG, and PNG is slow)
    # (pdftocairo -f $page_number -singlefile -jpeg $pdf_file1 -gray - | convert - miff:- ; \
    # pdftocairo -f $page_number -singlefile -jpeg $pdf_file2 -gray - | convert - miff:- ) | \
    # convert - \( -clone 0-1 -compose darken -composite \) \
    # -channel RGB -combine $DIFFDIR/$page_number.jpg

    # 1x (using Ghostscript for PDF to bitmap conversion)
    # convert -respect-parenthesis \
    # \( $pdf_file1[$page_index] -flatten -colorspace gray \) \
    # \( $pdf_file2[$page_index] -flatten -colorspace gray \) \
    # \( -clone 0-1 -compose darken -composite \) \
    # -channel RGB -combine $DIFFDIR/$page_number.jpg

    # compare $pdf_file1[$page_index] $pdf_file2[$page_index] \
    # -highlight-color blue pdfdiff/$page_number.png

    if (($? > 0)); then
    exit 1
    fi
    grayscale=$(convert pdfdiff/$page_number.jpg -colorspace HSL -channel g -separate +channel -format "%[fx:mean]" info:)
    if [ "$grayscale" != "0" ]; then
    echo "page $page_number ($grayscale)"
    fi
    }

    function num_pages {
    pdf_file=$1

    pdfinfo $pdf_file | grep "Pages:" | awk '{print $2}'
    }

    function minimum {
    echo $(( $1 < $2 ? $1 : $2 ))
    }

    pdf1_num_pages=$(num_pages $pdf_file1)
    pdf2_num_pages=$(num_pages $pdf_file2)

    min_pages=$(minimum $pdf1_num_pages $pdf2_num_pages)

    if [ "$pdf1_num_pages" -ne "$pdf2_num_pages" ]; then
    echo "PDF files have different lengths ($pdf1_num_pages and $pdf2_num_pages)"
    fi

    if [ -d "$DIFFDIR" ]; then
    rm -rf $DIFFDIR/*
    else
    mkdir $DIFFDIR
    fi

    for page_number in `seq 1 $min_pages`;
    do
    diff_page $pdf_file1 $pdf_file2 $page_number &
    # echo "$page_number \c"
    if [ "$(($page_number % $MAXPROCS))" -eq "0" ]; then
    wait
    fi
    done
    wait