-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecognize_pdf.sh
executable file
·33 lines (25 loc) · 1.02 KB
/
recognize_pdf.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/bin/bash
RESOLUTION=300
LANG="deu"
if [[ -z "$1" ]]; then
exit
fi
SOURCEFILE="$1"
OUTPUTFILE="${SOURCEFILE%.pdf}"
PAGES=$(mdls -name kMDItemNumberOfPages -raw "$SOURCEFILE")
FINALNAME="$OUTPUTFILE.pdf"
TEMPDIR="temp_page_dir"
OUTPUTDIR="$TEMPDIR/OCR"
mkdir -p "$TEMPDIR"
mkdir -p "$OUTPUTDIR"
for i in $(seq 1 $PAGES); do
echo "Converting page $i/$PAGES to image."
convert -density "$RESOLUTION"x"$RESOLUTION" -set units PixelsPerInch -depth 8 "$SOURCEFILE"\[$(($i - 1 ))\] $TEMPDIR/page$i.png
echo "Running OCR for languages $LANG."
tesseract "$TEMPDIR/page"$i.png "$OUTPUTDIR"/"$OUTPUTFILE"$i -l "$LANG" pdf
echo "---"
done
echo "Combining searchable PDFs into one file..."
gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile="$OUTPUTFILE ocr.pdf" "$OUTPUTDIR/$OUTPUTFILE"*.pdf && rm -r "$OUTPUTDIR" && rm -r "$TEMPDIR"
echo "All done. Output file name is: $OUTPUTFILE ocr.pdf"
# echo "ALERT:Conversion completed|Output file name is: $OUTPUTFILE ocr.pdf\n" # Displays alert in Platypus app runtime