From 73ad03c7b6cd83f608af79168f77b889fe3ac64a Mon Sep 17 00:00:00 2001
From: Matheus Lima <matheuslimaufc@gmail.com>
Date: Tue, 12 Feb 2019 20:50:39 -0300
Subject: [PATCH] Created endpoint to communicate with the Google STT API

---
 api/account/Pipfile.lock           | 21 ----------------
 api/public/Pipfile                 |  1 +
 api/public/Pipfile.lock            |  9 ++++++-
 api/public/api.py                  |  2 ++
 api/public/endpoints/google_stt.py | 39 ++++++++++++++++++++++++++++++
 5 files changed, 50 insertions(+), 22 deletions(-)
 create mode 100644 api/public/endpoints/google_stt.py

diff --git a/api/account/Pipfile.lock b/api/account/Pipfile.lock
index b7217f3c..11ca22e3 100644
--- a/api/account/Pipfile.lock
+++ b/api/account/Pipfile.lock
@@ -102,13 +102,6 @@
         }
     },
     "develop": {
-        "aniso8601": {
-            "hashes": [
-                "sha256:03c0ffeeb04edeca1ed59684cc6836dc377f58e52e315dc7be3af879909889f4",
-                "sha256:ac30cceff24aec920c37b8d74d7d8a5dd37b1f62a90b4f268a6234cabe147080"
-            ],
-            "version": "==4.1.0"
-        },
         "behave": {
             "hashes": [
                 "sha256:b9662327aa53294c1351b0a9c369093ccec1d21026f050c3bd9b3e5cccf81a86",
@@ -153,13 +146,6 @@
             "index": "pypi",
             "version": "==1.0.2"
         },
-        "flask-restful": {
-            "hashes": [
-                "sha256:ecd620c5cc29f663627f99e04f17d1f16d095c83dc1d618426e2ad68b03092f8",
-                "sha256:f8240ec12349afe8df1db168ea7c336c4e5b0271a36982bff7394f93275f2ca9"
-            ],
-            "version": "==0.3.7"
-        },
         "idna": {
             "hashes": [
                 "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
@@ -297,13 +283,6 @@
             ],
             "version": "==1.7.1"
         },
-        "pytz": {
-            "hashes": [
-                "sha256:32b0891edff07e28efe91284ed9c31e123d84bea3fd98e1f72be2508f43ef8d9",
-                "sha256:d5f05e487007e29e03409f9398d074e158d920d36eb82eaf66fb1136b0c5374c"
-            ],
-            "version": "==2018.9"
-        },
         "requests": {
             "hashes": [
                 "sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e",
diff --git a/api/public/Pipfile b/api/public/Pipfile
index 042bf330..b89d2f7e 100644
--- a/api/public/Pipfile
+++ b/api/public/Pipfile
@@ -10,6 +10,7 @@ selene = {path = "./../../shared"}
 flask = "*"
 flask-restful = "*"
 requests = "*"
+SpeechRecognition = "*"
 
 [requires]
 python_version = "3.7"
diff --git a/api/public/Pipfile.lock b/api/public/Pipfile.lock
index 96641a86..f2aa0190 100644
--- a/api/public/Pipfile.lock
+++ b/api/public/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "8f1ccc1ec0009a5789febd690a24105cc934edebfccbd20c73a55cffe8a1ed7a"
+            "sha256": "3442df8a082a5627f0484d743fb0462d2b7874f61a5e2d603efd23ed6989dd8f"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -136,6 +136,13 @@
             ],
             "version": "==1.12.0"
         },
+        "speechrecognition": {
+            "hashes": [
+                "sha256:4d8f73a0c05ec70331c3bacaa89ecc06dfa8d9aba0899276664cda06ab597e8e"
+            ],
+            "index": "pypi",
+            "version": "==3.8.1"
+        },
         "urllib3": {
             "hashes": [
                 "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
diff --git a/api/public/api.py b/api/public/api.py
index d70f348d..9b8af481 100644
--- a/api/public/api.py
+++ b/api/public/api.py
@@ -11,6 +11,7 @@ from selene.api.base_config import get_base_config
 from .endpoints.device_subscription import DeviceSubscriptionEndpoint
 from .endpoints.open_weather_map import OpenWeatherMapEndpoint
 from .endpoints.wolfram_alpha import WolframAlphaEndpoint
+from .endpoints.google_stt import GoogleSTTEndpoint
 
 public = Flask(__name__)
 public.config.from_object(get_base_config())
@@ -26,3 +27,4 @@ public_api.add_resource(DeviceSettingEndpoint, '/device/<string:device_id>/setti
 public_api.add_resource(DeviceSubscriptionEndpoint, '/device/<string:device_id>/subscription')
 public_api.add_resource(WolframAlphaEndpoint, '/wa')  # TODO: change this path in the API v2
 public_api.add_resource(OpenWeatherMapEndpoint, '/owm/<path:path>')     # TODO: change this path in the API v2
+public_api.add_resource(GoogleSTTEndpoint, '/stt')  # TODO: change this path in the API v2
diff --git a/api/public/endpoints/google_stt.py b/api/public/endpoints/google_stt.py
new file mode 100644
index 00000000..73062675
--- /dev/null
+++ b/api/public/endpoints/google_stt.py
@@ -0,0 +1,39 @@
+import os
+from io import BytesIO
+
+from selene.api import SeleneEndpoint
+from speech_recognition import AudioFile
+from speech_recognition import Recognizer
+
+
+class GoogleSTTEndpoint(SeleneEndpoint):
+    """ Endpoint to send a flac audio file with voice and get back a utterance"""
+    def __init__(self):
+        super(GoogleSTTEndpoint, self).__init__()
+        self.google_stt_key = os.environ['GOOGLE_STT_KEY']
+        self.recognizer = Recognizer()
+
+    def post(self):
+        lang = self.request.args['lang']
+        limit = int(self.request.args['limit'])
+        audio = self.request.data
+        # We need to replicate the first 16 bytes in the audio due a bug with the speech recognition library that
+        # removes the first 16 bytes from the flac file we are sending
+        with AudioFile(BytesIO(audio[:16] + audio)) as source:
+            data = self.recognizer.record(source)
+            response = self.recognizer.recognize_google(data, key=self.google_stt_key, language=lang, show_all=True)
+            if isinstance(response, dict):
+                alternative = response.get("alternative")
+                # Sorting by confidence:
+                alternative = sorted(alternative, key=lambda alt: alt['confidence'], reverse=True)
+                alternative = [alt['transcript'] for alt in alternative]
+                # Return n transcripts with the higher confidence. That is useful for the case when send a ambiguous
+                # voice file and the correct utterance is not the utterance with highest confidence and the API client
+                # is interested in test the utterances found.
+                if len(alternative) <= limit:
+                    return alternative
+                else:
+                    return alternative[:limit]
+            return []
+
+