-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial unit tests for the speech recognition library
- Loading branch information
1 parent
6e53b31
commit 38628d1
Showing
13 changed files
with
260 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,5 @@ pyinstaller==5.13.0 | |
torch | ||
pyperclip | ||
PyYAML | ||
numpy | ||
soundfile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# placeholder file to make this folder a module - this allows tests in this folder to be discovered by `python -m unittest discover` |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import unittest | ||
from os import path | ||
|
||
import custom_speech_recognition as sr | ||
|
||
|
||
class TestAudioFile(unittest.TestCase): | ||
def assertSimilar(self, bytes_1, bytes_2): | ||
for i, (byte_1, byte_2) in enumerate(zip(bytes_1, bytes_2)): | ||
if abs(byte_1 - byte_2) > 2: | ||
raise AssertionError("{} is really different from {} at index {}".format(bytes_1, bytes_2, i)) | ||
|
||
def test_get_segment(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-mono-32-bit-44100Hz.wav")) as source: audio = r.record(source) | ||
self.assertEqual(audio.get_raw_data(), audio.get_segment().get_raw_data()) | ||
self.assertEqual(audio.get_raw_data()[8:], audio.get_segment(0.022675738 * 2).get_raw_data()) | ||
self.assertEqual(audio.get_raw_data()[:16], audio.get_segment(None, 0.022675738 * 4).get_raw_data()) | ||
self.assertEqual(audio.get_raw_data()[8:16], audio.get_segment(0.022675738 * 2, 0.022675738 * 4).get_raw_data()) | ||
|
||
def test_wav_mono_8_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-mono-8-bit-44100Hz.wav")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
self.assertEqual(audio.sample_width, 1) | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\xff\x00\xff\x00\xff\xff\x00\xff\x00\xff\x00\xff\x00\x00\xff\x00\x00\xff\x00\xff\x00\xff\x00\xff\x00\xff\x00\xff\x00\xff\xff") | ||
|
||
def test_wav_mono_16_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-mono-16-bit-44100Hz.wav")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
self.assertEqual(audio.sample_width, 2) | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\xff\xff\x01\x00\xff\xff\x00\x00\x01\x00\xfe\xff\x01\x00\xfe\xff\x04\x00\xfc\xff\x04\x00\xfe\xff\xff\xff\x03\x00\xfe\xff") | ||
|
||
def test_wav_mono_24_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-mono-24-bit-44100Hz.wav")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
if audio.sample_width == 3: | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\x00\xff\xff\x00\x01\x00\x00\xff\xff\x00\x00\x00\x00\x01\x00\x00\xfe\xff\x00\x01\x00\x00\xfe\xff\x00\x04\x00\x00\xfb") | ||
else: | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x01\x00\x00\x00\xff\xff\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\xfe\xff\x00\x00\x01\x00") | ||
|
||
def test_wav_mono_32_bit(self): | ||
r = sr.Recognizer() | ||
audio_file_path = path.join(path.dirname(path.realpath(__file__)), "audio-mono-32-bit-44100Hz.wav") | ||
with sr.AudioFile(audio_file_path) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
self.assertEqual(audio.sample_width, 4) | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\x00\x00\x00\xff\xff\x00\x00\x01\x00\x00\x00\xff\xff\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\xfe\xff\x00\x00\x01\x00") | ||
|
||
def test_wav_stereo_8_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-stereo-8-bit-44100Hz.wav")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
self.assertEqual(audio.sample_width, 1) | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\xff\x00\xff\x00\x00\xff\x7f\x7f\x00\xff\x00\xff\x00\x00\xff\x00\x7f\x7f\x7f\x00\x00\xff\x00\xff\x00\xff\x00\x7f\x7f\x7f\x7f") | ||
|
||
def test_wav_stereo_16_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-stereo-16-bit-44100Hz.wav")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
self.assertEqual(audio.sample_width, 2) | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x02\x00\xfb\xff\x04\x00\xfe\xff\xfe\xff\x07\x00\xf6\xff\x07\x00\xf9\xff\t\x00\xf5\xff\x0c\x00\xf8\xff\x02\x00\x04\x00\xfa\xff") | ||
|
||
def test_wav_stereo_24_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-stereo-24-bit-44100Hz.wav")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
if audio.sample_width == 3: | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\x00\xfe\xff\x00\x02\x00\x00\xfe\xff\x00\x00\x00\x00\x02\x00\x00\xfc\xff\x00\x02\x00\x00\xfc\xff\x00\x08\x00\x00\xf6") | ||
else: | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\x00\x00\x00\xfe\xff\x00\x00\x02\x00\x00\x00\xfe\xff\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\xfc\xff\x00\x00\x02\x00") | ||
|
||
def test_wav_stereo_32_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-stereo-32-bit-44100Hz.wav")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
self.assertEqual(audio.sample_width, 4) | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\x00\x00\x00\xfe\xff\x00\x00\x02\x00\x00\x00\xfe\xff\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\xfc\xff\x00\x00\x02\x00") | ||
|
||
def test_aiff_mono_16_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-mono-16-bit-44100Hz.aiff")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
self.assertEqual(audio.sample_width, 2) | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\x00\xff\xff\x01\x00\xff\xff\x01\x00\xfe\xff\x02\x00\xfd\xff\x04\x00\xfc\xff\x03\x00\x00\x00\xfe\xff\x03\x00\xfd\xff") | ||
|
||
def test_aiff_stereo_16_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-stereo-16-bit-44100Hz.aiff")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
self.assertEqual(audio.sample_width, 2) | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\xfe\xff\x02\x00\xfe\xff\xff\xff\x04\x00\xfa\xff\x04\x00\xfa\xff\t\x00\xf6\xff\n\x00\xfa\xff\xff\xff\x08\x00\xf5\xff") | ||
|
||
def test_flac_mono_16_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-mono-16-bit-44100Hz.flac")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
self.assertEqual(audio.sample_width, 2) | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\xff\xff\x01\x00\xff\xff\x00\x00\x01\x00\xfe\xff\x02\x00\xfc\xff\x06\x00\xf9\xff\x06\x00\xfe\xff\xfe\xff\x05\x00\xfa\xff") | ||
|
||
def test_flac_mono_24_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-mono-24-bit-44100Hz.flac")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
if audio.sample_width == 3: | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\xff\xfe\xff\x02\x01\x00\xfd\xfe\xff\x04\x00\x00\xfc\x00\x00\x04\xfe\xff\xfb\x00\x00\x05\xfe\xff\xfc\x03\x00\x04\xfb") | ||
else: | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\x00\x00\xff\xfe\xff\x00\x02\x01\x00\x00\xfd\xfe\xff\x00\x04\x00\x00\x00\xfc\x00\x00\x00\x04\xfe\xff\x00\xfb\x00\x00") | ||
|
||
def test_flac_stereo_16_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-stereo-16-bit-44100Hz.flac")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
self.assertEqual(audio.sample_width, 2) | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\xff\xff\xff\xff\x02\x00\xfe\xff\x00\x00\x01\x00\xfd\xff\x01\x00\xff\xff\x04\x00\xfa\xff\x05\x00\xff\xff\xfd\xff\x08\x00\xf6\xff") | ||
|
||
def test_flac_stereo_24_bit(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(path.join(path.dirname(path.realpath(__file__)), "audio-stereo-24-bit-44100Hz.flac")) as source: audio = r.record(source) | ||
self.assertIsInstance(audio, sr.AudioData) | ||
self.assertEqual(audio.sample_rate, 44100) | ||
if audio.sample_width == 3: | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\x00\xfe\xff\x00\x02\x00\x00\xfe\xff\x00\x00\x00\xff\x01\x00\x02\xfc\xff\xfe\x01\x00\x02\xfc\xff\xfe\x07\x00\x01\xf6") | ||
else: | ||
self.assertSimilar(audio.get_raw_data()[:32], b"\x00\x00\x00\x00\x00\x00\xfe\xff\x00\x00\x02\x00\x00\x00\xfe\xff\x00\x00\x00\x00\x00\xff\x01\x00\x00\x02\xfc\xff\x00\xfe\x01\x00") | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
#!/usr/bin/env python3 | ||
# -*- coding: utf-8 -*- | ||
|
||
import os | ||
import unittest | ||
|
||
import custom_speech_recognition as sr | ||
|
||
|
||
class TestRecognition(unittest.TestCase): | ||
def setUp(self): | ||
self.AUDIO_FILE_EN = os.path.join(os.path.dirname(os.path.realpath(__file__)), "english.wav") | ||
self.AUDIO_FILE_FR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "french.aiff") | ||
self.AUDIO_FILE_ZH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "chinese.flac") | ||
self.WHISPER_CONFIG = {"temperature": 0} | ||
|
||
# def test_sphinx_english(self): | ||
# r = sr.Recognizer() | ||
# with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) | ||
# self.assertEqual(r.recognize_sphinx(audio), "one two three") | ||
|
||
def test_google_english(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) | ||
result = r.recognize_google(audio) | ||
self.assertIn(result, ["1 2"]) | ||
|
||
def test_google_french(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_google(audio, language="fr-FR"), u"et c'est la dictée numéro 1") | ||
|
||
def test_google_chinese(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_google(audio, language="zh-CN"), u"砸自己的脚") | ||
|
||
@unittest.skipUnless("WIT_AI_KEY" in os.environ, "requires Wit.ai key to be specified in WIT_AI_KEY environment variable") | ||
def test_wit_english(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_wit(audio, key=os.environ["WIT_AI_KEY"]), "one two three") | ||
|
||
@unittest.skipUnless("BING_KEY" in os.environ, "requires Microsoft Bing Voice Recognition key to be specified in BING_KEY environment variable") | ||
def test_bing_english(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_bing(audio, key=os.environ["BING_KEY"]), "123.") | ||
|
||
@unittest.skipUnless("BING_KEY" in os.environ, "requires Microsoft Bing Voice Recognition key to be specified in BING_KEY environment variable") | ||
def test_bing_french(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_bing(audio, key=os.environ["BING_KEY"], language="fr-FR"), u"Essaye la dictée numéro un.") | ||
|
||
@unittest.skipUnless("BING_KEY" in os.environ, "requires Microsoft Bing Voice Recognition key to be specified in BING_KEY environment variable") | ||
def test_bing_chinese(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_bing(audio, key=os.environ["BING_KEY"], language="zh-CN"), u"砸自己的脚。") | ||
|
||
@unittest.skipUnless("HOUNDIFY_CLIENT_ID" in os.environ and "HOUNDIFY_CLIENT_KEY" in os.environ, "requires Houndify client ID and client key to be specified in HOUNDIFY_CLIENT_ID and HOUNDIFY_CLIENT_KEY environment variables") | ||
def test_houndify_english(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_houndify(audio, client_id=os.environ["HOUNDIFY_CLIENT_ID"], client_key=os.environ["HOUNDIFY_CLIENT_KEY"]), "one two three") | ||
|
||
@unittest.skipUnless("IBM_USERNAME" in os.environ and "IBM_PASSWORD" in os.environ, "requires IBM Speech to Text username and password to be specified in IBM_USERNAME and IBM_PASSWORD environment variables") | ||
def test_ibm_english(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_ibm(audio, username=os.environ["IBM_USERNAME"], password=os.environ["IBM_PASSWORD"]), "one two three ") | ||
|
||
@unittest.skipUnless("IBM_USERNAME" in os.environ and "IBM_PASSWORD" in os.environ, "requires IBM Speech to Text username and password to be specified in IBM_USERNAME and IBM_PASSWORD environment variables") | ||
def test_ibm_french(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_ibm(audio, username=os.environ["IBM_USERNAME"], password=os.environ["IBM_PASSWORD"], language="fr-FR"), u"si la dictée numéro un ") | ||
|
||
@unittest.skipUnless("IBM_USERNAME" in os.environ and "IBM_PASSWORD" in os.environ, "requires IBM Speech to Text username and password to be specified in IBM_USERNAME and IBM_PASSWORD environment variables") | ||
def test_ibm_chinese(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_ibm(audio, username=os.environ["IBM_USERNAME"], password=os.environ["IBM_PASSWORD"], language="zh-CN"), u"砸 自己 的 脚 ") | ||
|
||
def test_whisper_english(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_whisper(audio, language="english", **self.WHISPER_CONFIG), " 1, 2, 3.") | ||
|
||
def test_whisper_french(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_whisper(audio, language="french", **self.WHISPER_CONFIG), " et c'est la dictée numéro 1.") | ||
|
||
def test_whisper_chinese(self): | ||
r = sr.Recognizer() | ||
with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source) | ||
self.assertEqual(r.recognize_whisper(audio, model="small", language="chinese", **self.WHISPER_CONFIG), u"砸自己的腳") | ||
|
||
if __name__ == "__main__": | ||
unittest.main() |