Skip to content

Commit 1828ac0

Browse files
authored
Feature/AISDK-70: Add custom vocab parameter to the streaming client Python (revdotcom#41)
1 parent 6428ab9 commit 1828ac0

File tree

5 files changed

+43
-31
lines changed

5 files changed

+43
-31
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts
5151
rm -fr .pytest_cache
5252

5353
lint: ## check style with flake8
54-
flake8 rev_ai tests --ignore=F401,W504,E731
54+
flake8 rev_ai tests --ignore=F401,W504,E731,E123
5555
flake8 rev_ai src --ignore=F401,W504
5656

5757
test: ## run tests quickly with the default Python

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,10 +157,10 @@ streaming_client = RevAiStreamingClient("ACCESS TOKEN",
157157
`on_error`, `on_close`, and `on_connected` are optional parameters that are functions to be called when the websocket errors, closes, and connects respectively. The default `on_error` raises the error, `on_close` prints out the code and reason for closing, and `on_connected` prints out the job ID.
158158
If passing in custom functions, make sure you provide the right parameters. See the sample code for the parameters.
159159

160-
Once you have a streaming client setup with a `MediaConfig` and access token, you can obtain a transcription generator of your audio.
160+
Once you have a streaming client setup with a `MediaConfig` and access token, you can obtain a transcription generator of your audio. You can also use a custom vocabulary with your streaming job by supplying the optional `custom_vocabulary_id` when starting a connection!
161161

162162
```python
163-
response_generator = streaming_client.start(AUDIO_GENERATOR)
163+
response_generator = streaming_client.start(AUDIO_GENERATOR, custom_vocabulary_id="CUSTOM VOCAB ID")
164164
```
165165

166166
`response_generator` is a generator object that yields the transcription results of the audio including partial and final transcriptions. The `start` method creates a thread sending audio pieces from the `AUDIO_GENERATOR` to our

src/rev_ai/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22
"""Top-level package for rev_ai"""
33

4-
__version__ = '2.6.1'
4+
__version__ = '2.7.0'
55

66
from .models import Job, JobStatus, Account, Transcript, MediaConfig, CaptionType, CustomVocabulary

src/rev_ai/streamingclient.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
from . import __version__
88

99
try:
10-
from urllib.parse import quote
10+
from urllib.parse import urlencode
1111
except ImportError:
12-
from urllib import quote
12+
from urllib import urlencode
1313

1414

1515
def on_error(error):
@@ -62,19 +62,24 @@ def __init__(self,
6262
self.on_connected = on_connected
6363
self.client = websocket.WebSocket(enable_multithread=True)
6464

65-
def start(self, generator, metadata=None):
65+
def start(self, generator, metadata=None, custom_vocabulary_id=None):
6666
"""Function to connect the websocket to the URL and start the response
6767
thread
6868
6969
:param generator: generator object that yields binary audio data
7070
:param metadata: metadata to be attached to streaming job
7171
"""
72-
url = self.base_url + '?access_token={}'.format(self.access_token) \
73-
+ '&content_type={}'.format(self.config.get_content_type_string()) \
74-
+ '&user_agent={}'.format(quote('RevAi-PythonSDK/{}'.format(__version__), safe=''))
72+
url = self.base_url + '?' + urlencode({
73+
'access_token': self.access_token,
74+
'content_type': self.config.get_content_type_string(),
75+
'user_agent': 'RevAi-PythonSDK/{}'.format(__version__)
76+
})
77+
78+
if custom_vocabulary_id:
79+
url += '&' + urlencode({'custom_vocabulary_id': custom_vocabulary_id})
7580

7681
if metadata:
77-
url += '&metadata={}'.format(quote(metadata, safe=''))
82+
url += '&' + urlencode({'metadata': metadata})
7883

7984
try:
8085
self.client.connect(url)

tests/test_streamingclient.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
from src.rev_ai.streamingclient import RevAiStreamingClient
99

1010
try:
11-
from urllib.parse import quote
11+
from urllib.parse import parse_qs, urlparse
1212
except ImportError:
13-
from urllib import quote
13+
from urlparse import parse_qs, urlparse
1414

1515

1616
@pytest.mark.usefixtures('mock_streaming_client', 'mock_generator')
@@ -54,33 +54,33 @@ def test_constructor_no_token_no_config(self):
5454
RevAiStreamingClient(None, example_config)
5555

5656
def test_start_success(self, mock_streaming_client, mock_generator, capsys):
57+
custom_vocabulary_id = 'mycustomvocabid'
5758
metadata = "my metadata"
58-
url = mock_streaming_client.base_url + \
59-
'?access_token={}'.format(mock_streaming_client.access_token) + \
60-
'&content_type={}'. \
61-
format(mock_streaming_client.config.get_content_type_string()) + \
62-
'&user_agent={}'.format(quote('RevAi-PythonSDK/{}'.format(__version__), safe='')) + \
63-
'&metadata={}'.format(quote(metadata))
59+
query_dict = {
60+
'access_token': mock_streaming_client.access_token,
61+
'content_type': mock_streaming_client.config.get_content_type_string(),
62+
'user_agent': 'RevAi-PythonSDK/{}'.format(__version__),
63+
'custom_vocabulary_id': custom_vocabulary_id,
64+
'metadata': metadata
65+
}
6466
example_data = '{"type":"partial","transcript":"Test"}'
6567
example_connected = '{"type":"connected","id":"testid"}'
6668
if six.PY3:
6769
example_data = example_data.encode('utf-8')
6870
example_connected = example_connected.encode('utf-8')
69-
data = [
70-
[0x1, example_connected],
71-
[0x1, example_data],
72-
[0x8, b'\x03\xe8End of input. Closing']
73-
]
74-
exp_responses = [
75-
'Connected, Job ID : testid\n',
76-
'{"type":"partial","transcript":"Test"}',
77-
'Connection Closed. Code : 1000; Reason : End of input. Closing\n'
78-
]
71+
data = [[0x1, example_connected],
72+
[0x1, example_data],
73+
[0x8, b'\x03\xe8End of input. Closing']]
74+
exp_responses = ['Connected, Job ID : testid\n',
75+
'{"type":"partial","transcript":"Test"}',
76+
'Connection Closed. Code : 1000; Reason : End of input. Closing\n']
7977
mock_streaming_client.client.recv_data.side_effect = data
8078

81-
response_gen = mock_streaming_client.start(mock_generator(), metadata)
79+
response_gen = mock_streaming_client.start(mock_generator(), metadata, custom_vocabulary_id)
8280

83-
mock_streaming_client.client.connect.assert_called_once_with(url)
81+
assert mock_streaming_client.client.connect.call_count == 1
82+
called_url = mock_streaming_client.client.connect.call_args_list[0].args[0]
83+
validate_query_parameters(called_url, query_dict)
8484
mock_streaming_client.client.send_binary.assert_any_call(0)
8585
mock_streaming_client.client.send_binary.assert_any_call(1)
8686
mock_streaming_client.client.send_binary.assert_any_call(2)
@@ -100,3 +100,10 @@ def test_end(self, mock_streaming_client):
100100
mock_streaming_client.end()
101101

102102
mock_streaming_client.client.abort.assert_called_once_with()
103+
104+
105+
def validate_query_parameters(called_url, query_dict):
106+
called_query_string = urlparse(called_url).query
107+
called_query_parameters = parse_qs(called_query_string)
108+
for key in called_query_parameters:
109+
assert called_query_parameters[key][0] == query_dict[key]

0 commit comments

Comments
 (0)