Last active
May 14, 2020 15:01
-
-
Save ablwr/83005f4b01cfdd1097a7f6fd70ff21f4 to your computer and use it in GitHub Desktop.
Revisions
-
ablwr renamed this gist
May 14, 2020 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
ablwr created this gist
May 14, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,39 @@ #!/bin/bash basefolder="/home/ashley/Development/personal/vasulka-archive-archive/ocr" for i in $(find * -iname '*.pdf'); do if [ `dirname $i` != "." ] then dirpath="${i%/*}" dir_arr=(`echo $dirpath | tr "/" "\n"`) path="" for x in "${dir_arr[@]}" do if [ -z "$path" ] then path=$x mkdir -p $basefolder$path else path=$path"/"$x mkdir -p $basefolder$path fi done ext="."${i##*.} output=${i/$ext/".txt"} if [ ! -f $basefolder$output ] || [ $i -nt $basefolder$output ] then echo $i pdftotext -enc ASCII7 $i $basefolder$output fi else ext="."${i##*.} output=${i/$ext/".txt"} if [ ! -f $basefolder$output ] || [ $i -nt $basefolder$output ] then echo $i pdftotext -enc ASCII7 $i $basefolder$output fi fi done