-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathspeech_to_text.py
123 lines (101 loc) · 4.04 KB
/
speech_to_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# -*- coding: utf-8 -*-
import requests
import telebot
import subprocess
import tempfile
import os
import xml.etree.ElementTree as XmlElementTree
import httplib2
import uuid
YANDEX_API_KEY = '92b2f711-9cb2-4f1b-b23e-770a21e225a2'
YANDEX_ASR_HOST = 'asr.yandex.net'
YANDEX_ASR_PATH = '/asr_xml'
CHUNK_SIZE = 1024 ** 2
def convert_to_pcm16b16000r(in_filename=None, in_bytes=None):
with tempfile.TemporaryFile() as temp_out_file:
temp_in_file = None
if in_bytes:
temp_in_file = tempfile.NamedTemporaryFile(delete=False)
temp_in_file.write(in_bytes)
in_filename = temp_in_file.name
temp_in_file.close()
if not in_filename:
raise Exception('Neither input file name nor input bytes is specified.')
# Запрос в командную строку для обращения к FFmpeg
command = [
r'/usr/bin/ffmpeg',
'-i', in_filename,
'-f', 's16le',
'-acodec', 'pcm_s16le',
'-ar', '16000',
'-'
]
proc = subprocess.Popen(command, stdout=temp_out_file, stderr=subprocess.DEVNULL)
proc.wait()
if temp_in_file:
os.remove(in_filename)
temp_out_file.seek(0)
return temp_out_file.read()
def read_chunks(chunk_size, bytes):
while True:
chunk = bytes[:chunk_size]
bytes = bytes[chunk_size:]
yield chunk
if not bytes:
break
def speech_to_text(filename=None, bytes=None, request_id=uuid.uuid4().hex, topic='notes', lang='ru-RU',
key=YANDEX_API_KEY):
# Если передан файл
if filename:
with open(filename, 'br') as file:
bytes = file.read()
if not bytes:
raise Exception('Neither file name nor bytes provided.')
# Конвертирование в нужный формат
bytes = convert_to_pcm16b16000r(in_bytes=bytes)
# Формирование тела запроса к Yandex API
url = YANDEX_ASR_PATH + '?uuid=%s&key=%s&topic=%s&lang=%s' % (
request_id,
key,
topic,
lang
)
# Считывание блока байтов
chunks = read_chunks(CHUNK_SIZE, bytes)
# Установление соединения и формирование запроса
connection = httplib2.HTTPConnectionWithTimeout(YANDEX_ASR_HOST)
connection.connect()
connection.putrequest('POST', url)
connection.putheader('Transfer-Encoding', 'chunked')
connection.putheader('Content-Type', 'audio/x-pcm;bit=16;rate=16000')
connection.endheaders()
# Отправка байтов блоками
for chunk in chunks:
connection.send(('%s\r\n' % hex(len(chunk))[2:]).encode())
connection.send(chunk)
connection.send('\r\n'.encode())
connection.send('0\r\n\r\n'.encode())
response = connection.getresponse()
# Обработка ответа сервера
if response.code == 200:
response_text = response.read()
xml = XmlElementTree.fromstring(response_text)
if int(xml.attrib['success']) == 1:
max_confidence = - float("inf")
text = ''
for child in xml:
if float(child.attrib['confidence']) > max_confidence:
text = child.text
max_confidence = float(child.attrib['confidence'])
if max_confidence != - float("inf"):
return text
else:
# Создавать собственные исключения для обработки бизнес-логики - правило хорошего тона
raise SpeechException('No text found.\n\nResponse:\n%s' % (response_text))
else:
raise SpeechException('No text found.\n\nResponse:\n%s' % (response_text))
else:
raise SpeechException('Unknown error.\nCode: %s\n\n%s' % (response.code, response.read()))
# Создание своего исключения
class SpeechException(Exception):
pass