From 2b62f74e5fe829f55e5adffa0d0ab5dd4706b0c9 Mon Sep 17 00:00:00 2001 From: Gwendal Roulleau Date: Sat, 3 Sep 2022 12:00:32 +0200 Subject: [PATCH] [mimictts] Use http method POST instead of GET (#13239) * [mimictts] Use http method POST instead of GET Using POST method allows longer request size. Also, using InputStreamResponseListener avoid keeping the response in-memory * [mimictts] quick fix for regression : providing length Some audiosink needs it Signed-off-by: Gwendal Roulleau --- .../internal/InputStreamAudioStream.java | 123 ++++++++++++++++++ .../voice/mimic/internal/MimicTTSService.java | 90 +++++++++---- .../voice/mimic/internal/dto/VoiceDto.java | 2 + 3 files changed, 188 insertions(+), 27 deletions(-) create mode 100644 bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java diff --git a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java new file mode 100644 index 00000000000..27d93a0efd3 --- /dev/null +++ b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java @@ -0,0 +1,123 @@ +/** + * Copyright (c) 2010-2022 Contributors to the openHAB project + * + * See the NOTICE file(s) distributed with this work for additional + * information. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0 + * + * SPDX-License-Identifier: EPL-2.0 + */ +package org.openhab.voice.mimic.internal; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.eclipse.jdt.annotation.NonNullByDefault; +import org.eclipse.jdt.annotation.Nullable; +import org.openhab.core.audio.AudioException; +import org.openhab.core.audio.AudioFormat; +import org.openhab.core.audio.FixedLengthAudioStream; + +/** + * An AudioStream with an {@link InputStream} inside + * + * @author Gwendal Roulleau - Initial contribution + */ +@NonNullByDefault +public class InputStreamAudioStream extends FixedLengthAudioStream { + + public InputStream innerInputStream; + public AudioFormat audioFormat; + public long length; + + public InputStreamAudioStream(InputStream innerInputStream, AudioFormat audioFormat, long length) { + super(); + this.innerInputStream = innerInputStream; + this.audioFormat = audioFormat; + this.length = length; + } + + @Override + public AudioFormat getFormat() { + return audioFormat; + } + + @Override + public int read() throws IOException { + return innerInputStream.read(); + } + + @Override + public int read(byte @Nullable [] b) throws IOException { + return innerInputStream.read(b); + } + + @Override + public int read(byte @Nullable [] b, int off, int len) throws IOException { + return innerInputStream.read(b, off, len); + } + + @Override + public byte[] readAllBytes() throws IOException { + return innerInputStream.readAllBytes(); + } + + @Override + public byte[] readNBytes(int len) throws IOException { + return innerInputStream.readNBytes(len); + } + + @Override + public int readNBytes(byte @Nullable [] b, int off, int len) throws IOException { + return innerInputStream.readNBytes(b, off, len); + } + + @Override + public long skip(long n) throws IOException { + return innerInputStream.skip(n); + } + + @Override + public int available() throws IOException { + return innerInputStream.available(); + } + + @Override + public void close() throws IOException { + innerInputStream.close(); + } + + @Override + public synchronized void mark(int readlimit) { + innerInputStream.mark(readlimit); + } + + @Override + public synchronized void reset() throws IOException { + innerInputStream.reset(); + } + + @Override + public boolean markSupported() { + return innerInputStream.markSupported(); + } + + @Override + public long transferTo(@Nullable OutputStream out) throws IOException { + return innerInputStream.transferTo(out); + } + + @Override + public long length() { + return length; + } + + @Override + public InputStream getClonedStream() throws AudioException { + throw new AudioException("Operation not supported"); + } +} diff --git a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java index b82befda855..108b74bf032 100644 --- a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java +++ b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java @@ -13,23 +13,29 @@ package org.openhab.voice.mimic.internal; import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.HashSet; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import org.eclipse.jdt.annotation.NonNullByDefault; import org.eclipse.jdt.annotation.Nullable; +import org.eclipse.jetty.client.HttpClient; +import org.eclipse.jetty.client.api.Response; +import org.eclipse.jetty.client.util.InputStreamResponseListener; +import org.eclipse.jetty.client.util.StringContentProvider; +import org.eclipse.jetty.http.HttpHeader; +import org.eclipse.jetty.http.HttpStatus; import org.openhab.core.audio.AudioFormat; import org.openhab.core.audio.AudioStream; -import org.openhab.core.audio.ByteArrayAudioStream; import org.openhab.core.config.core.ConfigurableService; +import org.openhab.core.io.net.http.HttpClientFactory; import org.openhab.core.io.net.http.HttpRequestBuilder; -import org.openhab.core.io.net.http.HttpUtil; -import org.openhab.core.library.types.RawType; import org.openhab.core.voice.TTSException; import org.openhab.core.voice.TTSService; import org.openhab.core.voice.Voice; @@ -38,6 +44,7 @@ import org.osgi.framework.Constants; import org.osgi.service.component.annotations.Activate; import org.osgi.service.component.annotations.Component; import org.osgi.service.component.annotations.Modified; +import org.osgi.service.component.annotations.Reference; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -57,6 +64,8 @@ import com.google.gson.JsonSyntaxException; @NonNullByDefault public class MimicTTSService implements TTSService { + private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class); + static final String SERVICE_CATEGORY = "voice"; static final String SERVICE_ID = "mimictts"; static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID; @@ -82,18 +91,16 @@ public class MimicTTSService implements TTSService { private Set availableVoices = new HashSet<>(); - /** - * Logger. - */ - private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class); - private final MimicConfiguration config = new MimicConfiguration(); private final Gson gson = new GsonBuilder().create(); + private final HttpClient httpClient; + @Activate - protected void activate(Map config) { + public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, Map config) { updateConfig(config); + this.httpClient = httpClientFactory.getCommonHttpClient(); } /** @@ -175,8 +182,9 @@ public class MimicTTSService implements TTSService { return; } for (VoiceDto voiceDto : mimicVoiceResponse) { - if (voiceDto.speakers != null && voiceDto.speakers.size() > 0) { - for (String speaker : voiceDto.speakers) { + List speakers = voiceDto.speakers; + if (speakers != null && !speakers.isEmpty()) { + for (String speaker : speakers) { availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, speaker)); } } else { @@ -223,27 +231,55 @@ public class MimicTTSService implements TTSService { if (!AUDIO_FORMAT.isCompatible(requestedFormat)) { throw new TTSException("The passed AudioFormat is unsupported"); } - String encodedText; - try { - encodedText = URLEncoder.encode(text, StandardCharsets.UTF_8.toString()); - } catch (UnsupportedEncodingException e) { - throw new IllegalArgumentException("Cannot encode text in URL " + text); - } String ssml = ""; if (text.startsWith("")) { ssml = "&ssml=true"; } - // create the audio byte array for given text, locale, format - String urlTTS = config.url + SYNTHETIZE_URL + "?text=" + encodedText + "&voice=" - + ((MimicVoice) voice).getTechnicalName() + ssml + "&noiseScale=" + config.audioVolatility + "&noiseW=" - + config.phonemeVolatility + "&lengthScale=" + config.speakingRate + "&audioTarget=client"; + // create the url for given locale, format + String urlTTS = config.url + SYNTHETIZE_URL + "?voice=" + ((MimicVoice) voice).getTechnicalName() + ssml + + "&noiseScale=" + config.audioVolatility + "&noiseW=" + config.phonemeVolatility + "&lengthScale=" + + config.speakingRate + "&audioTarget=client"; logger.debug("Querying mimic with URL {}", urlTTS); - RawType responseWav = HttpUtil.downloadData(urlTTS, "audio/wav", false, -1); - if (responseWav == null) { - throw new TTSException("Cannot get wav from mimic url " + urlTTS); + + // prepare the response as an inputstream + InputStreamResponseListener inputStreamResponseListener = new InputStreamResponseListener(); + // we will use a POST method for the text + StringContentProvider textContentProvider = new StringContentProvider(text, StandardCharsets.UTF_8); + httpClient.POST(urlTTS).content(textContentProvider).accept("audio/wav").send(inputStreamResponseListener); + + // compute the estimated timeout using a "stupid" method based on text length, as the response time depends on + // the requested text. Average speaker speed estimated to 10/second. + // Will use a safe margin multiplicator (x5) to accept very slow mimic server + // So the constant chosen is 5 * 10 = /2 + int timeout = text.length() / 2; + + // check response status and return AudioStream + Response response; + try { + response = inputStreamResponseListener.get(timeout, TimeUnit.SECONDS); + if (response.getStatus() == HttpStatus.OK_200) { + String lengthHeader = response.getHeaders().get(HttpHeader.CONTENT_LENGTH); + long length; + try { + length = Long.parseLong(lengthHeader); + } catch (NumberFormatException e) { + throw new TTSException( + "Cannot get Content-Length header from mimic response. Are you sure to query a mimic TTS server at " + + urlTTS + " ?"); + } + return new InputStreamAudioStream(inputStreamResponseListener.getInputStream(), AUDIO_FORMAT, length); + } else { + String errorMessage = "Cannot get wav from mimic url " + urlTTS + " with HTTP response code " + + response.getStatus() + " for reason " + response.getReason(); + TTSException ttsException = new TTSException(errorMessage); + response.abort(ttsException); + throw ttsException; + } + } catch (InterruptedException | TimeoutException | ExecutionException e) { + String errorMessage = "Cannot get wav from mimic url " + urlTTS; + throw new TTSException(errorMessage, e); } - return new ByteArrayAudioStream(responseWav.getBytes(), AUDIO_FORMAT); } } diff --git a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/dto/VoiceDto.java b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/dto/VoiceDto.java index a9dedf206a9..0b30706ffc4 100644 --- a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/dto/VoiceDto.java +++ b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/dto/VoiceDto.java @@ -16,6 +16,7 @@ import java.util.ArrayList; import java.util.List; import org.eclipse.jdt.annotation.NonNullByDefault; +import org.eclipse.jdt.annotation.Nullable; /** * Mimic Voice DTO. @@ -28,5 +29,6 @@ public class VoiceDto { public String key = "UNDEFINED"; public String language = "UNDEFINED"; public String name = "UNDEFINED"; + @Nullable public List speakers = new ArrayList<>(); }