-
Notifications
You must be signed in to change notification settings - Fork 0
/
transcribe_audio.py
32 lines (28 loc) · 1.16 KB
/
transcribe_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os.path
import urllib.parse
import boto3
RAW_AUDIO_BUCKET_NAME = os.environ['RAW_AUDIO_BUCKET_NAME']
TRANSCRIBED_AUDIO_BUCKET_NAME = os.environ['TRANSCRIBED_AUDIO_BUCKET_NAME']
AUDIO_LANGUAGE_CODE = os.environ['AUDIO_LANGUAGE_CODE']
transcribe_client = boto3.client('transcribe')
def transcribe_audio(event):
fileName = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'])
objectUrl = 'https://s3.amazonaws.com/{0}/{1}'.format(RAW_AUDIO_BUCKET_NAME, fileName)
print("Audio to transcribe: " + fileName)
response = transcribe_client.start_transcription_job(
TranscriptionJobName=fileName,
LanguageCode=AUDIO_LANGUAGE_CODE,
MediaFormat='mp3',
Media={
'MediaFileUri': objectUrl
},
OutputKey=fileName.replace(".mp3", ".json"),
OutputBucketName=TRANSCRIBED_AUDIO_BUCKET_NAME
)
print("Successfully transcribed audio to bucket: " + TRANSCRIBED_AUDIO_BUCKET_NAME)
def lambda_handler(event, context):
try:
transcribe_audio(event)
except Exception as e:
print('Exception when transcribing audio')
print(e)