[mimictts] Use http method POST instead of GET (#13239)

* [mimictts] Use http method POST instead of GET Using POST method allows longer request size. Also, using InputStreamResponseListener avoid keeping the response in-memory * [mimictts] quick fix for regression : providing length Some audiosink needs it Signed-off-by: Gwendal Roulleau <gwendal.roulleau@gmail.com>
2022-09-03 12:00:32 +02:00 · 2022-09-03 12:00:32 +02:00 · 2b62f74e5f
parent b27364679e
commit 2b62f74e5f
3 changed files with 188 additions and 27 deletions
--- a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java
+++ b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/InputStreamAudioStream.java
@ -0,0 +1,123 @@
+/**
+ * Copyright (c) 2010-2022 Contributors to the openHAB project
+ *
+ * See the NOTICE file(s) distributed with this work for additional
+ * information.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Eclipse Public License 2.0 which is available at
+ * http://www.eclipse.org/legal/epl-2.0
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ */
+package org.openhab.voice.mimic.internal;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.eclipse.jdt.annotation.NonNullByDefault;
+import org.eclipse.jdt.annotation.Nullable;
+import org.openhab.core.audio.AudioException;
+import org.openhab.core.audio.AudioFormat;
+import org.openhab.core.audio.FixedLengthAudioStream;
+
+/**
+ * An AudioStream with an {@link InputStream} inside
+ *
+ * @author Gwendal Roulleau - Initial contribution
+ */
+@NonNullByDefault
+public class InputStreamAudioStream extends FixedLengthAudioStream {
+
+    public InputStream innerInputStream;
+    public AudioFormat audioFormat;
+    public long length;
+
+    public InputStreamAudioStream(InputStream innerInputStream, AudioFormat audioFormat, long length) {
+        super();
+        this.innerInputStream = innerInputStream;
+        this.audioFormat = audioFormat;
+        this.length = length;
+    }
+
+    @Override
+    public AudioFormat getFormat() {
+        return audioFormat;
+    }
+
+    @Override
+    public int read() throws IOException {
+        return innerInputStream.read();
+    }
+
+    @Override
+    public int read(byte @Nullable [] b) throws IOException {
+        return innerInputStream.read(b);
+    }
+
+    @Override
+    public int read(byte @Nullable [] b, int off, int len) throws IOException {
+        return innerInputStream.read(b, off, len);
+    }
+
+    @Override
+    public byte[] readAllBytes() throws IOException {
+        return innerInputStream.readAllBytes();
+    }
+
+    @Override
+    public byte[] readNBytes(int len) throws IOException {
+        return innerInputStream.readNBytes(len);
+    }
+
+    @Override
+    public int readNBytes(byte @Nullable [] b, int off, int len) throws IOException {
+        return innerInputStream.readNBytes(b, off, len);
+    }
+
+    @Override
+    public long skip(long n) throws IOException {
+        return innerInputStream.skip(n);
+    }
+
+    @Override
+    public int available() throws IOException {
+        return innerInputStream.available();
+    }
+
+    @Override
+    public void close() throws IOException {
+        innerInputStream.close();
+    }
+
+    @Override
+    public synchronized void mark(int readlimit) {
+        innerInputStream.mark(readlimit);
+    }
+
+    @Override
+    public synchronized void reset() throws IOException {
+        innerInputStream.reset();
+    }
+
+    @Override
+    public boolean markSupported() {
+        return innerInputStream.markSupported();
+    }
+
+    @Override
+    public long transferTo(@Nullable OutputStream out) throws IOException {
+        return innerInputStream.transferTo(out);
+    }
+
+    @Override
+    public long length() {
+        return length;
+    }
+
+    @Override
+    public InputStream getClonedStream() throws AudioException {
+        throw new AudioException("Operation not supported");
+    }
+}
--- a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java
+++ b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/MimicTTSService.java
@ -13,23 +13,29 @@
 package org.openhab.voice.mimic.internal;

 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
 import java.nio.charset.StandardCharsets;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;

 import org.eclipse.jdt.annotation.NonNullByDefault;
 import org.eclipse.jdt.annotation.Nullable;
+import org.eclipse.jetty.client.HttpClient;
+import org.eclipse.jetty.client.api.Response;
+import org.eclipse.jetty.client.util.InputStreamResponseListener;
+import org.eclipse.jetty.client.util.StringContentProvider;
+import org.eclipse.jetty.http.HttpHeader;
+import org.eclipse.jetty.http.HttpStatus;
 import org.openhab.core.audio.AudioFormat;
 import org.openhab.core.audio.AudioStream;
-import org.openhab.core.audio.ByteArrayAudioStream;
 import org.openhab.core.config.core.ConfigurableService;
+import org.openhab.core.io.net.http.HttpClientFactory;
 import org.openhab.core.io.net.http.HttpRequestBuilder;
-import org.openhab.core.io.net.http.HttpUtil;
-import org.openhab.core.library.types.RawType;
 import org.openhab.core.voice.TTSException;
 import org.openhab.core.voice.TTSService;
 import org.openhab.core.voice.Voice;
@ -38,6 +44,7 @@ import org.osgi.framework.Constants;
 import org.osgi.service.component.annotations.Activate;
 import org.osgi.service.component.annotations.Component;
 import org.osgi.service.component.annotations.Modified;
+import org.osgi.service.component.annotations.Reference;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -57,6 +64,8 @@ import com.google.gson.JsonSyntaxException;
@NonNullByDefault
 public class MimicTTSService implements TTSService {

+    private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);
+
    static final String SERVICE_CATEGORY = "voice";
    static final String SERVICE_ID = "mimictts";
    static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
@ -82,18 +91,16 @@ public class MimicTTSService implements TTSService {

    private Set<Voice> availableVoices = new HashSet<>();

-    /**
-     * Logger.
-     */
-    private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);
-
    private final MimicConfiguration config = new MimicConfiguration();

    private final Gson gson = new GsonBuilder().create();

+    private final HttpClient httpClient;
+
    @Activate
-    protected void activate(Map<String, Object> config) {
+    public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, Map<String, Object> config) {
        updateConfig(config);
+        this.httpClient = httpClientFactory.getCommonHttpClient();
    }

    /**
@ -175,8 +182,9 @@ public class MimicTTSService implements TTSService {
                return;
            }
            for (VoiceDto voiceDto : mimicVoiceResponse) {
-                if (voiceDto.speakers != null && voiceDto.speakers.size() > 0) {
-                    for (String speaker : voiceDto.speakers) {
+                List<String> speakers = voiceDto.speakers;
+                if (speakers != null && !speakers.isEmpty()) {
+                    for (String speaker : speakers) {
                        availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, speaker));
                    }
                } else {
@ -223,27 +231,55 @@ public class MimicTTSService implements TTSService {
        if (!AUDIO_FORMAT.isCompatible(requestedFormat)) {
            throw new TTSException("The passed AudioFormat is unsupported");
        }
-        String encodedText;
-        try {
-            encodedText = URLEncoder.encode(text, StandardCharsets.UTF_8.toString());
-        } catch (UnsupportedEncodingException e) {
-            throw new IllegalArgumentException("Cannot encode text in URL " + text);
-        }

        String ssml = "";
        if (text.startsWith("<speak>")) {
            ssml = "&ssml=true";
        }

-        // create the audio byte array for given text, locale, format
-        String urlTTS = config.url + SYNTHETIZE_URL + "?text=" + encodedText + "&voice="
-                + ((MimicVoice) voice).getTechnicalName() + ssml + "&noiseScale=" + config.audioVolatility + "&noiseW="
-                + config.phonemeVolatility + "&lengthScale=" + config.speakingRate + "&audioTarget=client";
+        // create the url for given locale, format
+        String urlTTS = config.url + SYNTHETIZE_URL + "?voice=" + ((MimicVoice) voice).getTechnicalName() + ssml
+                + "&noiseScale=" + config.audioVolatility + "&noiseW=" + config.phonemeVolatility + "&lengthScale="
+                + config.speakingRate + "&audioTarget=client";
        logger.debug("Querying mimic with URL {}", urlTTS);
-        RawType responseWav = HttpUtil.downloadData(urlTTS, "audio/wav", false, -1);
-        if (responseWav == null) {
-            throw new TTSException("Cannot get wav from mimic url " + urlTTS);
+
+        // prepare the response as an inputstream
+        InputStreamResponseListener inputStreamResponseListener = new InputStreamResponseListener();
+        // we will use a POST method for the text
+        StringContentProvider textContentProvider = new StringContentProvider(text, StandardCharsets.UTF_8);
+        httpClient.POST(urlTTS).content(textContentProvider).accept("audio/wav").send(inputStreamResponseListener);
+
+        // compute the estimated timeout using a "stupid" method based on text length, as the response time depends on
+        // the requested text. Average speaker speed estimated to 10/second.
+        // Will use a safe margin multiplicator (x5) to accept very slow mimic server
+        // So the constant chosen is 5 * 10 = /2
+        int timeout = text.length() / 2;
+
+        // check response status and return AudioStream
+        Response response;
+        try {
+            response = inputStreamResponseListener.get(timeout, TimeUnit.SECONDS);
+            if (response.getStatus() == HttpStatus.OK_200) {
+                String lengthHeader = response.getHeaders().get(HttpHeader.CONTENT_LENGTH);
+                long length;
+                try {
+                    length = Long.parseLong(lengthHeader);
+                } catch (NumberFormatException e) {
+                    throw new TTSException(
+                            "Cannot get Content-Length header from mimic response. Are you sure to query a mimic TTS server at "
+                                    + urlTTS + " ?");
+                }
+                return new InputStreamAudioStream(inputStreamResponseListener.getInputStream(), AUDIO_FORMAT, length);
+            } else {
+                String errorMessage = "Cannot get wav from mimic url " + urlTTS + " with HTTP response code "
+                        + response.getStatus() + " for reason " + response.getReason();
+                TTSException ttsException = new TTSException(errorMessage);
+                response.abort(ttsException);
+                throw ttsException;
+            }
+        } catch (InterruptedException | TimeoutException | ExecutionException e) {
+            String errorMessage = "Cannot get wav from mimic url " + urlTTS;
+            throw new TTSException(errorMessage, e);
        }
-        return new ByteArrayAudioStream(responseWav.getBytes(), AUDIO_FORMAT);
    }
 }
--- a/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/dto/VoiceDto.java
+++ b/bundles/org.openhab.voice.mimictts/src/main/java/org/openhab/voice/mimic/internal/dto/VoiceDto.java
@ -16,6 +16,7 @@ import java.util.ArrayList;
 import java.util.List;

 import org.eclipse.jdt.annotation.NonNullByDefault;
+import org.eclipse.jdt.annotation.Nullable;

 /**
 * Mimic Voice DTO.
@ -28,5 +29,6 @@ public class VoiceDto {
    public String key = "UNDEFINED";
    public String language = "UNDEFINED";
    public String name = "UNDEFINED";
+    @Nullable
    public List<String> speakers = new ArrayList<>();
 }