Skip to content

Instantly share code, notes, and snippets.

@fieri
Forked from scf37/simulate_scanned_PDF.md
Created April 11, 2022 05:13
Show Gist options
  • Select an option

  • Save fieri/02d40eaaa733976a2d53f18bea5033c0 to your computer and use it in GitHub Desktop.

Select an option

Save fieri/02d40eaaa733976a2d53f18bea5033c0 to your computer and use it in GitHub Desktop.
Simulate a scanned PDF with ImageMagick

Simulate a scanned PDF with ImageMagick

  1. Download and install ImageMagick + Convert Module
  2. Execute the following command in the console (change the filename)
$ convert -density 200 INPUT.pdf -rotate 0.3 +noise Multiplicative -format pdf  -quality 85 -compress JPEG -colorspace gray OUTPUT.pdf

Description of the options

Batch-Scripts

Python 3

#!/usr/bin/env python3
import os
import sys
import subprocess

'''
If you want use Drag and Drop, add this to the windows registry (or include into a .reg file and execute the file):

Windows Registry Editor Version 5.00

[HKEY_CLASSES_ROOT\Python.File\shellex\DropHandler]
@="{86C86720-42A0-1069-A2E8-08002B30309D}"
'''

# Important for Drag and Drop! Change current work directory to the directory of the script
os.chdir(os.path.abspath( os.path.dirname(os.path.realpath(__file__))))

default_input_dir = os.path.abspath('input')+"\\"
output_dir = os.path.abspath('output')+"\\"

def printProgress (iteration, total, prefix = '', suffix = '', decimals = 1, barLength = 100):
    formatStr = "{0:." + str(decimals) + "f}"
    percent = formatStr.format(100 * (iteration / float(total)))
    filledLength = int(round(barLength * iteration / float(total)))
    bar = '█' * filledLength + '-' * (barLength - filledLength)
    sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percent, '%', suffix)),
    if iteration == total:
        sys.stdout.write('\n')
    sys.stdout.flush()

def process(input_filename, output_filename):
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    s = 'convert -density 200 "%s" -rotate 0.3 +noise Multiplicative -format pdf -quality 85 -compress JPEG -colorspace gray "%s"' % (input_filename, output_filename)
    err, out = subprocess.Popen(s, stdout=subprocess.PIPE, shell=True).communicate()
    if err:
        print(err)
    elif out:
        print(out)

def walkDir(dir):
    for root, dirs, files in os.walk(dir):
        files = list(filter(lambda x: x.endswith(".pdf"), files))
        i = 0;
        printProgress(i, len(files), prefix = 'Progress:', suffix = 'Complete', barLength = 50)
        for file in files:
            process(os.path.join(root, file), str(output_dir +  file))
            i += 1
            printProgress(i, len(files), prefix = 'Progress:', suffix = 'Complete', barLength = 50)

def main():
    if len(sys.argv) == 1:
        print("Process default Input-Directory: "+default_input_dir)
        walkDir(default_input_dir)
    else:
        for arg in sys.argv[1:]:
            arg = os.path.abspath(arg)
            if os.path.isdir(arg):
                print("Processing Directory: "+arg)
                walkDir(arg)
            elif os.path.isfile(arg) and arg.endswith(".pdf"):
                print("Processing single File: "+arg)
                process(arg, output_dir + os.path.basename(arg))
            else:
                print("Unknown Path-Type!")

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        pass
    except Exception as e:
        print(e)
        input()

PowerShell

$scriptPath = split-path -parent $MyInvocation.MyCommand.Definition

$input_directory = $scriptPath + "\Input\"
$output_directory = $scriptPath + "\Output\"

New-Item -ItemType Directory -Force -Path $output_directory

$files = Get-ChildItem $input_directory -Filter *.pdf

ForEach ($file in $files) {
    "Process: " + $file.Name 
    $command = "convert -density 200 '" + $input_directory + $file.Name  + "' -rotate 0.3 +noise Multiplicative -format pdf -quality 85 -compress JPEG -colorspace gray '" + $output_directory + $file.Name + "'"
    Invoke-Expression -command $command
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment