Created endpoint to communicate with the Google STT API

2019-02-12 20:50:39 -03:00 · 2019-02-12 20:50:39 -03:00 · 73ad03c7b6
parent be31c2d1ac
commit 73ad03c7b6
5 changed files with 50 additions and 22 deletions
--- a/api/account/Pipfile.lock
+++ b/api/account/Pipfile.lock
@ -102,13 +102,6 @@
        }
    },
    "develop": {
-        "aniso8601": {
-            "hashes": [
-                "sha256:03c0ffeeb04edeca1ed59684cc6836dc377f58e52e315dc7be3af879909889f4",
-                "sha256:ac30cceff24aec920c37b8d74d7d8a5dd37b1f62a90b4f268a6234cabe147080"
-            ],
-            "version": "==4.1.0"
-        },
        "behave": {
            "hashes": [
                "sha256:b9662327aa53294c1351b0a9c369093ccec1d21026f050c3bd9b3e5cccf81a86",
@ -153,13 +146,6 @@
            "index": "pypi",
            "version": "==1.0.2"
        },
-        "flask-restful": {
-            "hashes": [
-                "sha256:ecd620c5cc29f663627f99e04f17d1f16d095c83dc1d618426e2ad68b03092f8",
-                "sha256:f8240ec12349afe8df1db168ea7c336c4e5b0271a36982bff7394f93275f2ca9"
-            ],
-            "version": "==0.3.7"
-        },
        "idna": {
            "hashes": [
                "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
@ -297,13 +283,6 @@
            ],
            "version": "==1.7.1"
        },
-        "pytz": {
-            "hashes": [
-                "sha256:32b0891edff07e28efe91284ed9c31e123d84bea3fd98e1f72be2508f43ef8d9",
-                "sha256:d5f05e487007e29e03409f9398d074e158d920d36eb82eaf66fb1136b0c5374c"
-            ],
-            "version": "==2018.9"
-        },
        "requests": {
            "hashes": [
                "sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e",
--- a/api/public/Pipfile
+++ b/api/public/Pipfile
@ -10,6 +10,7 @@ selene = {path = "./../../shared"}
 flask = "*"
 flask-restful = "*"
 requests = "*"
+SpeechRecognition = "*"

 [requires]
 python_version = "3.7"
--- a/api/public/Pipfile.lock
+++ b/api/public/Pipfile.lock
@ -1,7 +1,7 @@
 {
    "_meta": {
        "hash": {
-            "sha256": "8f1ccc1ec0009a5789febd690a24105cc934edebfccbd20c73a55cffe8a1ed7a"
+            "sha256": "3442df8a082a5627f0484d743fb0462d2b7874f61a5e2d603efd23ed6989dd8f"
        },
        "pipfile-spec": 6,
        "requires": {
@ -136,6 +136,13 @@
            ],
            "version": "==1.12.0"
        },
+        "speechrecognition": {
+            "hashes": [
+                "sha256:4d8f73a0c05ec70331c3bacaa89ecc06dfa8d9aba0899276664cda06ab597e8e"
+            ],
+            "index": "pypi",
+            "version": "==3.8.1"
+        },
        "urllib3": {
            "hashes": [
                "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
--- a/api/public/api.py
+++ b/api/public/api.py
@ -11,6 +11,7 @@ from selene.api.base_config import get_base_config
 from .endpoints.device_subscription import DeviceSubscriptionEndpoint
 from .endpoints.open_weather_map import OpenWeatherMapEndpoint
 from .endpoints.wolfram_alpha import WolframAlphaEndpoint
+from .endpoints.google_stt import GoogleSTTEndpoint

 public = Flask(__name__)
 public.config.from_object(get_base_config())
@ -26,3 +27,4 @@ public_api.add_resource(DeviceSettingEndpoint, '/device/<string:device_id>/setti
 public_api.add_resource(DeviceSubscriptionEndpoint, '/device/<string:device_id>/subscription')
 public_api.add_resource(WolframAlphaEndpoint, '/wa')  # TODO: change this path in the API v2
 public_api.add_resource(OpenWeatherMapEndpoint, '/owm/<path:path>')     # TODO: change this path in the API v2
+public_api.add_resource(GoogleSTTEndpoint, '/stt')  # TODO: change this path in the API v2
--- a/api/public/endpoints/google_stt.py
+++ b/api/public/endpoints/google_stt.py
@ -0,0 +1,39 @@
+import os
+from io import BytesIO
+
+from selene.api import SeleneEndpoint
+from speech_recognition import AudioFile
+from speech_recognition import Recognizer
+
+
+class GoogleSTTEndpoint(SeleneEndpoint):
+    """ Endpoint to send a flac audio file with voice and get back a utterance"""
+    def __init__(self):
+        super(GoogleSTTEndpoint, self).__init__()
+        self.google_stt_key = os.environ['GOOGLE_STT_KEY']
+        self.recognizer = Recognizer()
+
+    def post(self):
+        lang = self.request.args['lang']
+        limit = int(self.request.args['limit'])
+        audio = self.request.data
+        # We need to replicate the first 16 bytes in the audio due a bug with the speech recognition library that
+        # removes the first 16 bytes from the flac file we are sending
+        with AudioFile(BytesIO(audio[:16] + audio)) as source:
+            data = self.recognizer.record(source)
+            response = self.recognizer.recognize_google(data, key=self.google_stt_key, language=lang, show_all=True)
+            if isinstance(response, dict):
+                alternative = response.get("alternative")
+                # Sorting by confidence:
+                alternative = sorted(alternative, key=lambda alt: alt['confidence'], reverse=True)
+                alternative = [alt['transcript'] for alt in alternative]
+                # Return n transcripts with the higher confidence. That is useful for the case when send a ambiguous
+                # voice file and the correct utterance is not the utterance with highest confidence and the API client
+                # is interested in test the utterances found.
+                if len(alternative) <= limit:
+                    return alternative
+                else:
+                    return alternative[:limit]
+            return []
+
+