Created endpoint to communicate with the Google STT API

pull/46/head
Matheus Lima 2019-02-12 20:50:39 -03:00
parent be31c2d1ac
commit 73ad03c7b6
5 changed files with 50 additions and 22 deletions

View File

@ -102,13 +102,6 @@
}
},
"develop": {
"aniso8601": {
"hashes": [
"sha256:03c0ffeeb04edeca1ed59684cc6836dc377f58e52e315dc7be3af879909889f4",
"sha256:ac30cceff24aec920c37b8d74d7d8a5dd37b1f62a90b4f268a6234cabe147080"
],
"version": "==4.1.0"
},
"behave": {
"hashes": [
"sha256:b9662327aa53294c1351b0a9c369093ccec1d21026f050c3bd9b3e5cccf81a86",
@ -153,13 +146,6 @@
"index": "pypi",
"version": "==1.0.2"
},
"flask-restful": {
"hashes": [
"sha256:ecd620c5cc29f663627f99e04f17d1f16d095c83dc1d618426e2ad68b03092f8",
"sha256:f8240ec12349afe8df1db168ea7c336c4e5b0271a36982bff7394f93275f2ca9"
],
"version": "==0.3.7"
},
"idna": {
"hashes": [
"sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
@ -297,13 +283,6 @@
],
"version": "==1.7.1"
},
"pytz": {
"hashes": [
"sha256:32b0891edff07e28efe91284ed9c31e123d84bea3fd98e1f72be2508f43ef8d9",
"sha256:d5f05e487007e29e03409f9398d074e158d920d36eb82eaf66fb1136b0c5374c"
],
"version": "==2018.9"
},
"requests": {
"hashes": [
"sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e",

View File

@ -10,6 +10,7 @@ selene = {path = "./../../shared"}
flask = "*"
flask-restful = "*"
requests = "*"
SpeechRecognition = "*"
[requires]
python_version = "3.7"

View File

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "8f1ccc1ec0009a5789febd690a24105cc934edebfccbd20c73a55cffe8a1ed7a"
"sha256": "3442df8a082a5627f0484d743fb0462d2b7874f61a5e2d603efd23ed6989dd8f"
},
"pipfile-spec": 6,
"requires": {
@ -136,6 +136,13 @@
],
"version": "==1.12.0"
},
"speechrecognition": {
"hashes": [
"sha256:4d8f73a0c05ec70331c3bacaa89ecc06dfa8d9aba0899276664cda06ab597e8e"
],
"index": "pypi",
"version": "==3.8.1"
},
"urllib3": {
"hashes": [
"sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",

View File

@ -11,6 +11,7 @@ from selene.api.base_config import get_base_config
from .endpoints.device_subscription import DeviceSubscriptionEndpoint
from .endpoints.open_weather_map import OpenWeatherMapEndpoint
from .endpoints.wolfram_alpha import WolframAlphaEndpoint
from .endpoints.google_stt import GoogleSTTEndpoint
public = Flask(__name__)
public.config.from_object(get_base_config())
@ -26,3 +27,4 @@ public_api.add_resource(DeviceSettingEndpoint, '/device/<string:device_id>/setti
public_api.add_resource(DeviceSubscriptionEndpoint, '/device/<string:device_id>/subscription')
public_api.add_resource(WolframAlphaEndpoint, '/wa') # TODO: change this path in the API v2
public_api.add_resource(OpenWeatherMapEndpoint, '/owm/<path:path>') # TODO: change this path in the API v2
public_api.add_resource(GoogleSTTEndpoint, '/stt') # TODO: change this path in the API v2

View File

@ -0,0 +1,39 @@
import os
from io import BytesIO
from selene.api import SeleneEndpoint
from speech_recognition import AudioFile
from speech_recognition import Recognizer
class GoogleSTTEndpoint(SeleneEndpoint):
""" Endpoint to send a flac audio file with voice and get back a utterance"""
def __init__(self):
super(GoogleSTTEndpoint, self).__init__()
self.google_stt_key = os.environ['GOOGLE_STT_KEY']
self.recognizer = Recognizer()
def post(self):
lang = self.request.args['lang']
limit = int(self.request.args['limit'])
audio = self.request.data
# We need to replicate the first 16 bytes in the audio due a bug with the speech recognition library that
# removes the first 16 bytes from the flac file we are sending
with AudioFile(BytesIO(audio[:16] + audio)) as source:
data = self.recognizer.record(source)
response = self.recognizer.recognize_google(data, key=self.google_stt_key, language=lang, show_all=True)
if isinstance(response, dict):
alternative = response.get("alternative")
# Sorting by confidence:
alternative = sorted(alternative, key=lambda alt: alt['confidence'], reverse=True)
alternative = [alt['transcript'] for alt in alternative]
# Return n transcripts with the higher confidence. That is useful for the case when send a ambiguous
# voice file and the correct utterance is not the utterance with highest confidence and the API client
# is interested in test the utterances found.
if len(alternative) <= limit:
return alternative
else:
return alternative[:limit]
return []