From 73ad03c7b6cd83f608af79168f77b889fe3ac64a Mon Sep 17 00:00:00 2001 From: Matheus Lima Date: Tue, 12 Feb 2019 20:50:39 -0300 Subject: [PATCH] Created endpoint to communicate with the Google STT API --- api/account/Pipfile.lock | 21 ---------------- api/public/Pipfile | 1 + api/public/Pipfile.lock | 9 ++++++- api/public/api.py | 2 ++ api/public/endpoints/google_stt.py | 39 ++++++++++++++++++++++++++++++ 5 files changed, 50 insertions(+), 22 deletions(-) create mode 100644 api/public/endpoints/google_stt.py diff --git a/api/account/Pipfile.lock b/api/account/Pipfile.lock index b7217f3c..11ca22e3 100644 --- a/api/account/Pipfile.lock +++ b/api/account/Pipfile.lock @@ -102,13 +102,6 @@ } }, "develop": { - "aniso8601": { - "hashes": [ - "sha256:03c0ffeeb04edeca1ed59684cc6836dc377f58e52e315dc7be3af879909889f4", - "sha256:ac30cceff24aec920c37b8d74d7d8a5dd37b1f62a90b4f268a6234cabe147080" - ], - "version": "==4.1.0" - }, "behave": { "hashes": [ "sha256:b9662327aa53294c1351b0a9c369093ccec1d21026f050c3bd9b3e5cccf81a86", @@ -153,13 +146,6 @@ "index": "pypi", "version": "==1.0.2" }, - "flask-restful": { - "hashes": [ - "sha256:ecd620c5cc29f663627f99e04f17d1f16d095c83dc1d618426e2ad68b03092f8", - "sha256:f8240ec12349afe8df1db168ea7c336c4e5b0271a36982bff7394f93275f2ca9" - ], - "version": "==0.3.7" - }, "idna": { "hashes": [ "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", @@ -297,13 +283,6 @@ ], "version": "==1.7.1" }, - "pytz": { - "hashes": [ - "sha256:32b0891edff07e28efe91284ed9c31e123d84bea3fd98e1f72be2508f43ef8d9", - "sha256:d5f05e487007e29e03409f9398d074e158d920d36eb82eaf66fb1136b0c5374c" - ], - "version": "==2018.9" - }, "requests": { "hashes": [ "sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e", diff --git a/api/public/Pipfile b/api/public/Pipfile index 042bf330..b89d2f7e 100644 --- a/api/public/Pipfile +++ b/api/public/Pipfile @@ -10,6 +10,7 @@ selene = {path = "./../../shared"} flask = "*" flask-restful = "*" requests = "*" +SpeechRecognition = "*" [requires] python_version = "3.7" diff --git a/api/public/Pipfile.lock b/api/public/Pipfile.lock index 96641a86..f2aa0190 100644 --- a/api/public/Pipfile.lock +++ b/api/public/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "8f1ccc1ec0009a5789febd690a24105cc934edebfccbd20c73a55cffe8a1ed7a" + "sha256": "3442df8a082a5627f0484d743fb0462d2b7874f61a5e2d603efd23ed6989dd8f" }, "pipfile-spec": 6, "requires": { @@ -136,6 +136,13 @@ ], "version": "==1.12.0" }, + "speechrecognition": { + "hashes": [ + "sha256:4d8f73a0c05ec70331c3bacaa89ecc06dfa8d9aba0899276664cda06ab597e8e" + ], + "index": "pypi", + "version": "==3.8.1" + }, "urllib3": { "hashes": [ "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39", diff --git a/api/public/api.py b/api/public/api.py index d70f348d..9b8af481 100644 --- a/api/public/api.py +++ b/api/public/api.py @@ -11,6 +11,7 @@ from selene.api.base_config import get_base_config from .endpoints.device_subscription import DeviceSubscriptionEndpoint from .endpoints.open_weather_map import OpenWeatherMapEndpoint from .endpoints.wolfram_alpha import WolframAlphaEndpoint +from .endpoints.google_stt import GoogleSTTEndpoint public = Flask(__name__) public.config.from_object(get_base_config()) @@ -26,3 +27,4 @@ public_api.add_resource(DeviceSettingEndpoint, '/device//setti public_api.add_resource(DeviceSubscriptionEndpoint, '/device//subscription') public_api.add_resource(WolframAlphaEndpoint, '/wa') # TODO: change this path in the API v2 public_api.add_resource(OpenWeatherMapEndpoint, '/owm/') # TODO: change this path in the API v2 +public_api.add_resource(GoogleSTTEndpoint, '/stt') # TODO: change this path in the API v2 diff --git a/api/public/endpoints/google_stt.py b/api/public/endpoints/google_stt.py new file mode 100644 index 00000000..73062675 --- /dev/null +++ b/api/public/endpoints/google_stt.py @@ -0,0 +1,39 @@ +import os +from io import BytesIO + +from selene.api import SeleneEndpoint +from speech_recognition import AudioFile +from speech_recognition import Recognizer + + +class GoogleSTTEndpoint(SeleneEndpoint): + """ Endpoint to send a flac audio file with voice and get back a utterance""" + def __init__(self): + super(GoogleSTTEndpoint, self).__init__() + self.google_stt_key = os.environ['GOOGLE_STT_KEY'] + self.recognizer = Recognizer() + + def post(self): + lang = self.request.args['lang'] + limit = int(self.request.args['limit']) + audio = self.request.data + # We need to replicate the first 16 bytes in the audio due a bug with the speech recognition library that + # removes the first 16 bytes from the flac file we are sending + with AudioFile(BytesIO(audio[:16] + audio)) as source: + data = self.recognizer.record(source) + response = self.recognizer.recognize_google(data, key=self.google_stt_key, language=lang, show_all=True) + if isinstance(response, dict): + alternative = response.get("alternative") + # Sorting by confidence: + alternative = sorted(alternative, key=lambda alt: alt['confidence'], reverse=True) + alternative = [alt['transcript'] for alt in alternative] + # Return n transcripts with the higher confidence. That is useful for the case when send a ambiguous + # voice file and the correct utterance is not the utterance with highest confidence and the API client + # is interested in test the utterances found. + if len(alternative) <= limit: + return alternative + else: + return alternative[:limit] + return [] + +