-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathocr.sh
More file actions
executable file
·50 lines (44 loc) · 1.85 KB
/
ocr.sh
File metadata and controls
executable file
·50 lines (44 loc) · 1.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/bin/sh
# get list of supported languages and choose it by dmenu
# see https://github.com/tesseract-ocr/tesseract for more info
language=$(tesseract --list-langs | sed '1d' |
# create combinations of all possible languages, taken from: https://stackoverflow.com/questions/68178062/how-to-generate-all-possible-combinations-of-lines-in-a-file-using-bash
awk -v FS=+ '{ a[NR]=$1 }
END { for (i=0; i<2^NR; i++)
{ s="";
for (j=0; j<NR; j++)
{ e=and(i, 2^j);
printf "%s", e?s a[j+1]:""; if (e)s=FS }
print comb } }' |
dmenu -p "languages: ")
# language=$(tesseract --list-langs | sed '1d' | dmenu -p "language: ")
if [[ $language == "" ]]; then
echo "error: no input provided"
else
# capturing the image and temporarily saving it
maim --quiet --select /tmp/ocr.png
# checking for brightness by compressing grayscale version of captured image to 1x1
# tesseract works better with black text on white background
brightness=$(convert /tmp/ocr.png -colorspace gray -resize 1x1 txt:- | grep -Po 'a\(\K[0-9]*')
if [[ $brightness < 50 ]]; then
# convert image to negative if it's dark and pass it to tesseract
output=$(convert /tmp/ocr.png -channel RGB -negate - | tesseract - stdout -l $language --oem 1)
else
# else pass to tesseract as is
output=$(tesseract /tmp/ocr.png stdout -l $language --oem 1)
fi
if [[ $output == "" ]]; then
echo "error: selection was cancelled"
else
# create temporary file for storing transcribed text
printf '%b ' $output > /tmp/ocr.txt
# open the text file in your editor of choice
$TERMINAL -e $SHELL -c "$EDITOR /tmp/ocr.txt"
# $TERMINAL --title="Tesseract OCR" -e $SHELL -c "$EDITOR /tmp/ocr.txt"
# ^
# works only for Alacritty
rm /tmp/ocr.txt
fi
# removing temporary image we created earlier
rm /tmp/ocr.png
fi