Feature/AISDK-70: Add custom vocab parameter to the streaming client Python (revdotcom#41)

KostasRev · web-flow · commit 1828ac0dd42a · 2019-11-08T12:36:10.000-05:00
diff --git a/Makefile b/Makefile
@@ -51,7 +51,7 @@ clean-test: ## remove test and coverage artifacts
 	rm -fr .pytest_cache
 
 lint: ## check style with flake8
-	flake8 rev_ai tests --ignore=F401,W504,E731
+	flake8 rev_ai tests --ignore=F401,W504,E731,E123
 	flake8 rev_ai src --ignore=F401,W504
 
 test: ## run tests quickly with the default Python
diff --git a/README.md b/README.md
@@ -157,10 +157,10 @@ streaming_client = RevAiStreamingClient("ACCESS TOKEN",
 `on_error`, `on_close`, and `on_connected` are optional parameters that are functions to be called when the websocket errors, closes, and connects respectively. The default `on_error` raises the error, `on_close` prints out the code and reason for closing, and `on_connected` prints out the job ID.
 If passing in custom functions, make sure you provide the right parameters. See the sample code for the parameters.
 
-Once you have a streaming client setup with a `MediaConfig` and access token, you can obtain a transcription generator of your audio.
+Once you have a streaming client setup with a `MediaConfig` and access token, you can obtain a transcription generator of your audio. You can also use a custom vocabulary with your streaming job by supplying the optional `custom_vocabulary_id` when starting a connection!
 
 ```python
-response_generator = streaming_client.start(AUDIO_GENERATOR)
+response_generator = streaming_client.start(AUDIO_GENERATOR, custom_vocabulary_id="CUSTOM VOCAB ID")
 ```
 
 `response_generator` is a generator object that yields the transcription results of the audio including partial and final transcriptions. The `start` method creates a thread sending audio pieces from the `AUDIO_GENERATOR` to our
diff --git a/src/rev_ai/__init__.py b/src/rev_ai/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """Top-level package for rev_ai"""
 
-__version__ = '2.6.1'
+__version__ = '2.7.0'
 
 from .models import Job, JobStatus, Account, Transcript, MediaConfig, CaptionType, CustomVocabulary
diff --git a/src/rev_ai/streamingclient.py b/src/rev_ai/streamingclient.py
@@ -7,9 +7,9 @@
 from . import __version__
 
 try:
-    from urllib.parse import quote
+    from urllib.parse import urlencode
 except ImportError:
-    from urllib import quote
+    from urllib import urlencode
 
 
 def on_error(error):
@@ -62,19 +62,24 @@ def __init__(self,
         self.on_connected = on_connected
         self.client = websocket.WebSocket(enable_multithread=True)
 
-    def start(self, generator, metadata=None):
+    def start(self, generator, metadata=None, custom_vocabulary_id=None):
         """Function to connect the websocket to the URL and start the response
             thread
 
         :param generator: generator object that yields binary audio data
         :param metadata: metadata to be attached to streaming job
         """
-        url = self.base_url + '?access_token={}'.format(self.access_token) \
-            + '&content_type={}'.format(self.config.get_content_type_string()) \
-            + '&user_agent={}'.format(quote('RevAi-PythonSDK/{}'.format(__version__), safe=''))
+        url = self.base_url + '?' + urlencode({
+            'access_token': self.access_token,
+            'content_type': self.config.get_content_type_string(),
+            'user_agent': 'RevAi-PythonSDK/{}'.format(__version__)
+        })
+
+        if custom_vocabulary_id:
+            url += '&' + urlencode({'custom_vocabulary_id': custom_vocabulary_id})
 
         if metadata:
-            url += '&metadata={}'.format(quote(metadata, safe=''))
+            url += '&' + urlencode({'metadata': metadata})
 
         try:
             self.client.connect(url)
diff --git a/tests/test_streamingclient.py b/tests/test_streamingclient.py
@@ -8,9 +8,9 @@
 from src.rev_ai.streamingclient import RevAiStreamingClient
 
 try:
-    from urllib.parse import quote
+    from urllib.parse import parse_qs, urlparse
 except ImportError:
-    from urllib import quote
+    from urlparse import parse_qs, urlparse
 
 
 @pytest.mark.usefixtures('mock_streaming_client', 'mock_generator')
@@ -54,33 +54,33 @@ def test_constructor_no_token_no_config(self):
             RevAiStreamingClient(None, example_config)
 
     def test_start_success(self, mock_streaming_client, mock_generator, capsys):
+        custom_vocabulary_id = 'mycustomvocabid'
         metadata = "my metadata"
-        url = mock_streaming_client.base_url + \
-            '?access_token={}'.format(mock_streaming_client.access_token) + \
-            '&content_type={}'. \
-            format(mock_streaming_client.config.get_content_type_string()) + \
-            '&user_agent={}'.format(quote('RevAi-PythonSDK/{}'.format(__version__), safe='')) + \
-            '&metadata={}'.format(quote(metadata))
+        query_dict = {
+            'access_token': mock_streaming_client.access_token,
+            'content_type': mock_streaming_client.config.get_content_type_string(),
+            'user_agent': 'RevAi-PythonSDK/{}'.format(__version__),
+            'custom_vocabulary_id': custom_vocabulary_id,
+            'metadata': metadata
+        }
         example_data = '{"type":"partial","transcript":"Test"}'
         example_connected = '{"type":"connected","id":"testid"}'
         if six.PY3:
             example_data = example_data.encode('utf-8')
             example_connected = example_connected.encode('utf-8')
-        data = [
-            [0x1, example_connected],
-            [0x1, example_data],
-            [0x8, b'\x03\xe8End of input. Closing']
-        ]
-        exp_responses = [
-            'Connected, Job ID : testid\n',
-            '{"type":"partial","transcript":"Test"}',
-            'Connection Closed. Code : 1000; Reason : End of input. Closing\n'
-        ]
+        data = [[0x1, example_connected],
+                [0x1, example_data],
+                [0x8, b'\x03\xe8End of input. Closing']]
+        exp_responses = ['Connected, Job ID : testid\n',
+                         '{"type":"partial","transcript":"Test"}',
+                         'Connection Closed. Code : 1000; Reason : End of input. Closing\n']
         mock_streaming_client.client.recv_data.side_effect = data
 
-        response_gen = mock_streaming_client.start(mock_generator(), metadata)
+        response_gen = mock_streaming_client.start(mock_generator(), metadata, custom_vocabulary_id)
 
-        mock_streaming_client.client.connect.assert_called_once_with(url)
+        assert mock_streaming_client.client.connect.call_count == 1
+        called_url = mock_streaming_client.client.connect.call_args_list[0].args[0]
+        validate_query_parameters(called_url, query_dict)
         mock_streaming_client.client.send_binary.assert_any_call(0)
         mock_streaming_client.client.send_binary.assert_any_call(1)
         mock_streaming_client.client.send_binary.assert_any_call(2)
@@ -100,3 +100,10 @@ def test_end(self, mock_streaming_client):
         mock_streaming_client.end()
 
         mock_streaming_client.client.abort.assert_called_once_with()
+
+
+def validate_query_parameters(called_url, query_dict):
+    called_query_string = urlparse(called_url).query
+    called_query_parameters = parse_qs(called_query_string)
+    for key in called_query_parameters:
+        assert called_query_parameters[key][0] == query_dict[key]