Created endpoint to communicate with the Google STT API
parent
be31c2d1ac
commit
73ad03c7b6
|
@ -102,13 +102,6 @@
|
|||
}
|
||||
},
|
||||
"develop": {
|
||||
"aniso8601": {
|
||||
"hashes": [
|
||||
"sha256:03c0ffeeb04edeca1ed59684cc6836dc377f58e52e315dc7be3af879909889f4",
|
||||
"sha256:ac30cceff24aec920c37b8d74d7d8a5dd37b1f62a90b4f268a6234cabe147080"
|
||||
],
|
||||
"version": "==4.1.0"
|
||||
},
|
||||
"behave": {
|
||||
"hashes": [
|
||||
"sha256:b9662327aa53294c1351b0a9c369093ccec1d21026f050c3bd9b3e5cccf81a86",
|
||||
|
@ -153,13 +146,6 @@
|
|||
"index": "pypi",
|
||||
"version": "==1.0.2"
|
||||
},
|
||||
"flask-restful": {
|
||||
"hashes": [
|
||||
"sha256:ecd620c5cc29f663627f99e04f17d1f16d095c83dc1d618426e2ad68b03092f8",
|
||||
"sha256:f8240ec12349afe8df1db168ea7c336c4e5b0271a36982bff7394f93275f2ca9"
|
||||
],
|
||||
"version": "==0.3.7"
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
|
||||
|
@ -297,13 +283,6 @@
|
|||
],
|
||||
"version": "==1.7.1"
|
||||
},
|
||||
"pytz": {
|
||||
"hashes": [
|
||||
"sha256:32b0891edff07e28efe91284ed9c31e123d84bea3fd98e1f72be2508f43ef8d9",
|
||||
"sha256:d5f05e487007e29e03409f9398d074e158d920d36eb82eaf66fb1136b0c5374c"
|
||||
],
|
||||
"version": "==2018.9"
|
||||
},
|
||||
"requests": {
|
||||
"hashes": [
|
||||
"sha256:502a824f31acdacb3a35b6690b5fbf0bc41d63a24a45c4004352b0242707598e",
|
||||
|
|
|
@ -10,6 +10,7 @@ selene = {path = "./../../shared"}
|
|||
flask = "*"
|
||||
flask-restful = "*"
|
||||
requests = "*"
|
||||
SpeechRecognition = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3.7"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "8f1ccc1ec0009a5789febd690a24105cc934edebfccbd20c73a55cffe8a1ed7a"
|
||||
"sha256": "3442df8a082a5627f0484d743fb0462d2b7874f61a5e2d603efd23ed6989dd8f"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
|
@ -136,6 +136,13 @@
|
|||
],
|
||||
"version": "==1.12.0"
|
||||
},
|
||||
"speechrecognition": {
|
||||
"hashes": [
|
||||
"sha256:4d8f73a0c05ec70331c3bacaa89ecc06dfa8d9aba0899276664cda06ab597e8e"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==3.8.1"
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39",
|
||||
|
|
|
@ -11,6 +11,7 @@ from selene.api.base_config import get_base_config
|
|||
from .endpoints.device_subscription import DeviceSubscriptionEndpoint
|
||||
from .endpoints.open_weather_map import OpenWeatherMapEndpoint
|
||||
from .endpoints.wolfram_alpha import WolframAlphaEndpoint
|
||||
from .endpoints.google_stt import GoogleSTTEndpoint
|
||||
|
||||
public = Flask(__name__)
|
||||
public.config.from_object(get_base_config())
|
||||
|
@ -26,3 +27,4 @@ public_api.add_resource(DeviceSettingEndpoint, '/device/<string:device_id>/setti
|
|||
public_api.add_resource(DeviceSubscriptionEndpoint, '/device/<string:device_id>/subscription')
|
||||
public_api.add_resource(WolframAlphaEndpoint, '/wa') # TODO: change this path in the API v2
|
||||
public_api.add_resource(OpenWeatherMapEndpoint, '/owm/<path:path>') # TODO: change this path in the API v2
|
||||
public_api.add_resource(GoogleSTTEndpoint, '/stt') # TODO: change this path in the API v2
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
import os
|
||||
from io import BytesIO
|
||||
|
||||
from selene.api import SeleneEndpoint
|
||||
from speech_recognition import AudioFile
|
||||
from speech_recognition import Recognizer
|
||||
|
||||
|
||||
class GoogleSTTEndpoint(SeleneEndpoint):
|
||||
""" Endpoint to send a flac audio file with voice and get back a utterance"""
|
||||
def __init__(self):
|
||||
super(GoogleSTTEndpoint, self).__init__()
|
||||
self.google_stt_key = os.environ['GOOGLE_STT_KEY']
|
||||
self.recognizer = Recognizer()
|
||||
|
||||
def post(self):
|
||||
lang = self.request.args['lang']
|
||||
limit = int(self.request.args['limit'])
|
||||
audio = self.request.data
|
||||
# We need to replicate the first 16 bytes in the audio due a bug with the speech recognition library that
|
||||
# removes the first 16 bytes from the flac file we are sending
|
||||
with AudioFile(BytesIO(audio[:16] + audio)) as source:
|
||||
data = self.recognizer.record(source)
|
||||
response = self.recognizer.recognize_google(data, key=self.google_stt_key, language=lang, show_all=True)
|
||||
if isinstance(response, dict):
|
||||
alternative = response.get("alternative")
|
||||
# Sorting by confidence:
|
||||
alternative = sorted(alternative, key=lambda alt: alt['confidence'], reverse=True)
|
||||
alternative = [alt['transcript'] for alt in alternative]
|
||||
# Return n transcripts with the higher confidence. That is useful for the case when send a ambiguous
|
||||
# voice file and the correct utterance is not the utterance with highest confidence and the API client
|
||||
# is interested in test the utterances found.
|
||||
if len(alternative) <= limit:
|
||||
return alternative
|
||||
else:
|
||||
return alternative[:limit]
|
||||
return []
|
||||
|
||||
|
Loading…
Reference in New Issue