-
Notifications
You must be signed in to change notification settings - Fork 0
/
s3_textract_functions.py
executable file
·44 lines (33 loc) · 1.07 KB
/
s3_textract_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import boto3
import time
sleep_time = 5
def StartDocumentTextDetection(s3BucketName, objectName):
response = None
client = boto3.client('textract')
response = client.start_document_text_detection(
DocumentLocation={
'S3Object': {
'Bucket': s3BucketName,
'Name': objectName
}
}
)
return response["JobId"]
def isJobComplete(jobId):
time.sleep(sleep_time)
client = boto3.client('textract')
response = client.get_document_text_detection(JobId=jobId)
status = response["JobStatus"]
print("Job status: {}".format(status))
while (status == "IN_PROGRESS"):
time.sleep(sleep_time)
response = client.get_document_text_detection(JobId=jobId)
status = response["JobStatus"]
print("Job status: {}".format(status))
return status
def getDocumentTextDetection(jobId):
isJobComplete(jobId)
client = boto3.client('textract')
response = client.get_document_text_detection(JobId=jobId)
status = response["JobStatus"]
return response