-
Notifications
You must be signed in to change notification settings - Fork 23
/
pytesseract_ocr.py
43 lines (32 loc) · 1.39 KB
/
pytesseract_ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# extract text from all the images in a folder
# storing the text in a single file
from PIL import Image
import pytesseract
import os
pytesseract.pytesseract.tesseract_cmd= r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
def main():
# path for the folder for getting the raw images
path =r"C:\Users\user\Desktop\ANPR System\images"
# link to the file in which output needs to be kept
fullTempPath =r"C:\Users\user\Desktop\ANPR System\images\outputFile.txt"
# iterating the images inside the folder
for imageName in os.listdir(path):
inputPath = os.path.join(path, imageName)
img = Image.open(inputPath)
# applying ocr using pytesseract for python
text = pytesseract.image_to_string(img, lang ="eng")
# saving the text for appending it to the output.txt file
# a + parameter used for creating the file if not present
# and if present then append the text content
file1 = open(fullTempPath, "a+")
# providing the name of the image
file1.write(imageName+"\n")
# providing the content in the image
file1.write(text+"\n")
file1.close()
# for printing the output file
file2 = open(fullTempPath, 'r')
print(file2.read())
file2.close()
if __name__ == '__main__':
main()