[mimictts] Use http method POST instead of GET (#13239)

* [mimictts] Use http method POST instead of GET

Using POST method allows longer request size.
Also, using InputStreamResponseListener avoid keeping the response in-memory

* [mimictts] quick fix for regression : providing length

Some audiosink needs it

Signed-off-by: Gwendal Roulleau <gwendal.roulleau@gmail.com>
pull/13339/head
Gwendal Roulleau 2022-09-03 12:00:32 +02:00 committed by GitHub
parent b27364679e
commit 2b62f74e5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 188 additions and 27 deletions

View File

@ -0,0 +1,123 @@
/**
* Copyright (c) 2010-2022 Contributors to the openHAB project
*
* See the NOTICE file(s) distributed with this work for additional
* information.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.openhab.voice.mimic.internal;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.eclipse.jdt.annotation.NonNullByDefault;
import org.eclipse.jdt.annotation.Nullable;
import org.openhab.core.audio.AudioException;
import org.openhab.core.audio.AudioFormat;
import org.openhab.core.audio.FixedLengthAudioStream;
/**
* An AudioStream with an {@link InputStream} inside
*
* @author Gwendal Roulleau - Initial contribution
*/
@NonNullByDefault
public class InputStreamAudioStream extends FixedLengthAudioStream {
public InputStream innerInputStream;
public AudioFormat audioFormat;
public long length;
public InputStreamAudioStream(InputStream innerInputStream, AudioFormat audioFormat, long length) {
super();
this.innerInputStream = innerInputStream;
this.audioFormat = audioFormat;
this.length = length;
}
@Override
public AudioFormat getFormat() {
return audioFormat;
}
@Override
public int read() throws IOException {
return innerInputStream.read();
}
@Override
public int read(byte @Nullable [] b) throws IOException {
return innerInputStream.read(b);
}
@Override
public int read(byte @Nullable [] b, int off, int len) throws IOException {
return innerInputStream.read(b, off, len);
}
@Override
public byte[] readAllBytes() throws IOException {
return innerInputStream.readAllBytes();
}
@Override
public byte[] readNBytes(int len) throws IOException {
return innerInputStream.readNBytes(len);
}
@Override
public int readNBytes(byte @Nullable [] b, int off, int len) throws IOException {
return innerInputStream.readNBytes(b, off, len);
}
@Override
public long skip(long n) throws IOException {
return innerInputStream.skip(n);
}
@Override
public int available() throws IOException {
return innerInputStream.available();
}
@Override
public void close() throws IOException {
innerInputStream.close();
}
@Override
public synchronized void mark(int readlimit) {
innerInputStream.mark(readlimit);
}
@Override
public synchronized void reset() throws IOException {
innerInputStream.reset();
}
@Override
public boolean markSupported() {
return innerInputStream.markSupported();
}
@Override
public long transferTo(@Nullable OutputStream out) throws IOException {
return innerInputStream.transferTo(out);
}
@Override
public long length() {
return length;
}
@Override
public InputStream getClonedStream() throws AudioException {
throw new AudioException("Operation not supported");
}
}

View File

@ -13,23 +13,29 @@
package org.openhab.voice.mimic.internal;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.eclipse.jdt.annotation.NonNullByDefault;
import org.eclipse.jdt.annotation.Nullable;
import org.eclipse.jetty.client.HttpClient;
import org.eclipse.jetty.client.api.Response;
import org.eclipse.jetty.client.util.InputStreamResponseListener;
import org.eclipse.jetty.client.util.StringContentProvider;
import org.eclipse.jetty.http.HttpHeader;
import org.eclipse.jetty.http.HttpStatus;
import org.openhab.core.audio.AudioFormat;
import org.openhab.core.audio.AudioStream;
import org.openhab.core.audio.ByteArrayAudioStream;
import org.openhab.core.config.core.ConfigurableService;
import org.openhab.core.io.net.http.HttpClientFactory;
import org.openhab.core.io.net.http.HttpRequestBuilder;
import org.openhab.core.io.net.http.HttpUtil;
import org.openhab.core.library.types.RawType;
import org.openhab.core.voice.TTSException;
import org.openhab.core.voice.TTSService;
import org.openhab.core.voice.Voice;
@ -38,6 +44,7 @@ import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Modified;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -57,6 +64,8 @@ import com.google.gson.JsonSyntaxException;
@NonNullByDefault
public class MimicTTSService implements TTSService {
private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);
static final String SERVICE_CATEGORY = "voice";
static final String SERVICE_ID = "mimictts";
static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
@ -82,18 +91,16 @@ public class MimicTTSService implements TTSService {
private Set<Voice> availableVoices = new HashSet<>();
/**
* Logger.
*/
private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);
private final MimicConfiguration config = new MimicConfiguration();
private final Gson gson = new GsonBuilder().create();
private final HttpClient httpClient;
@Activate
protected void activate(Map<String, Object> config) {
public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, Map<String, Object> config) {
updateConfig(config);
this.httpClient = httpClientFactory.getCommonHttpClient();
}
/**
@ -175,8 +182,9 @@ public class MimicTTSService implements TTSService {
return;
}
for (VoiceDto voiceDto : mimicVoiceResponse) {
if (voiceDto.speakers != null && voiceDto.speakers.size() > 0) {
for (String speaker : voiceDto.speakers) {
List<String> speakers = voiceDto.speakers;
if (speakers != null && !speakers.isEmpty()) {
for (String speaker : speakers) {
availableVoices.add(new MimicVoice(voiceDto.key, voiceDto.language, voiceDto.name, speaker));
}
} else {
@ -223,27 +231,55 @@ public class MimicTTSService implements TTSService {
if (!AUDIO_FORMAT.isCompatible(requestedFormat)) {
throw new TTSException("The passed AudioFormat is unsupported");
}
String encodedText;
try {
encodedText = URLEncoder.encode(text, StandardCharsets.UTF_8.toString());
} catch (UnsupportedEncodingException e) {
throw new IllegalArgumentException("Cannot encode text in URL " + text);
}
String ssml = "";
if (text.startsWith("<speak>")) {
ssml = "&ssml=true";
}
// create the audio byte array for given text, locale, format
String urlTTS = config.url + SYNTHETIZE_URL + "?text=" + encodedText + "&voice="
+ ((MimicVoice) voice).getTechnicalName() + ssml + "&noiseScale=" + config.audioVolatility + "&noiseW="
+ config.phonemeVolatility + "&lengthScale=" + config.speakingRate + "&audioTarget=client";
// create the url for given locale, format
String urlTTS = config.url + SYNTHETIZE_URL + "?voice=" + ((MimicVoice) voice).getTechnicalName() + ssml
+ "&noiseScale=" + config.audioVolatility + "&noiseW=" + config.phonemeVolatility + "&lengthScale="
+ config.speakingRate + "&audioTarget=client";
logger.debug("Querying mimic with URL {}", urlTTS);
RawType responseWav = HttpUtil.downloadData(urlTTS, "audio/wav", false, -1);
if (responseWav == null) {
throw new TTSException("Cannot get wav from mimic url " + urlTTS);
// prepare the response as an inputstream
InputStreamResponseListener inputStreamResponseListener = new InputStreamResponseListener();
// we will use a POST method for the text
StringContentProvider textContentProvider = new StringContentProvider(text, StandardCharsets.UTF_8);
httpClient.POST(urlTTS).content(textContentProvider).accept("audio/wav").send(inputStreamResponseListener);
// compute the estimated timeout using a "stupid" method based on text length, as the response time depends on
// the requested text. Average speaker speed estimated to 10/second.
// Will use a safe margin multiplicator (x5) to accept very slow mimic server
// So the constant chosen is 5 * 10 = /2
int timeout = text.length() / 2;
// check response status and return AudioStream
Response response;
try {
response = inputStreamResponseListener.get(timeout, TimeUnit.SECONDS);
if (response.getStatus() == HttpStatus.OK_200) {
String lengthHeader = response.getHeaders().get(HttpHeader.CONTENT_LENGTH);
long length;
try {
length = Long.parseLong(lengthHeader);
} catch (NumberFormatException e) {
throw new TTSException(
"Cannot get Content-Length header from mimic response. Are you sure to query a mimic TTS server at "
+ urlTTS + " ?");
}
return new InputStreamAudioStream(inputStreamResponseListener.getInputStream(), AUDIO_FORMAT, length);
} else {
String errorMessage = "Cannot get wav from mimic url " + urlTTS + " with HTTP response code "
+ response.getStatus() + " for reason " + response.getReason();
TTSException ttsException = new TTSException(errorMessage);
response.abort(ttsException);
throw ttsException;
}
} catch (InterruptedException | TimeoutException | ExecutionException e) {
String errorMessage = "Cannot get wav from mimic url " + urlTTS;
throw new TTSException(errorMessage, e);
}
return new ByteArrayAudioStream(responseWav.getBytes(), AUDIO_FORMAT);
}
}

View File

@ -16,6 +16,7 @@ import java.util.ArrayList;
import java.util.List;
import org.eclipse.jdt.annotation.NonNullByDefault;
import org.eclipse.jdt.annotation.Nullable;
/**
* Mimic Voice DTO.
@ -28,5 +29,6 @@ public class VoiceDto {
public String key = "UNDEFINED";
public String language = "UNDEFINED";
public String name = "UNDEFINED";
@Nullable
public List<String> speakers = new ArrayList<>();
}