[openaitts] OpenAI Text-to-Speech initial contribution (#17733)

Also-by: Wouter Born <github@maindrain.net>
Signed-off-by: Artur-Fedjukevits <fedjukevitsh@gmail.com>
pull/17981/head
Artur-Fedjukevits 2024-12-24 18:19:25 +01:00 committed by GitHub
parent 50e3ca62c0
commit 7c6e658f56
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 408 additions and 0 deletions

View File

@ -465,6 +465,7 @@
/bundles/org.openhab.voice.mactts/ @kaikreuzer
/bundles/org.openhab.voice.marytts/ @kaikreuzer
/bundles/org.openhab.voice.mimictts/ @dalgwen
/bundles/org.openhab.voice.openaitts/ @Artur-Fedjukevits
/bundles/org.openhab.voice.picotts/ @FlorianSW
/bundles/org.openhab.voice.pipertts/ @GiviMAD
/bundles/org.openhab.voice.pollytts/ @openhab/add-ons-maintainers

View File

@ -2301,6 +2301,11 @@
<artifactId>org.openhab.voice.mimictts</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.openhab.addons.bundles</groupId>
<artifactId>org.openhab.voice.openaitts</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.openhab.addons.bundles</groupId>
<artifactId>org.openhab.voice.picotts</artifactId>

View File

@ -0,0 +1,14 @@
This content is produced and maintained by the openHAB project.
* Project home: https://www.openhab.org
== Declared Project Licenses
This program and the accompanying materials are made available under the terms
of the Eclipse Public License 2.0 which is available at
https://www.eclipse.org/legal/epl-2.0/.
== Source Code
https://github.com/openhab/openhab-addons

View File

@ -0,0 +1,23 @@
# OpenAI Text-to-Speech
The OpenAI TTS (Text-to-Speech) add-on for openHAB allows you to integrate OpenAI's Text-to-Speech capabilities into your openHAB system.
The advantage of this service over others is that one selected voice can speak different languages.
This is useful, for example, in conjunction with ChatGPT binding, which will help in learning foreign languages.
You can find the price for this service here - https://openai.com/api/pricing/
## Configuration
To configure the OpenAI TTS, **Settings / Other Services - OpenAI Text-to-Speech** and set:
* **apiKey** - The API key to be used for the requests.
* **apiUrl** - The server API where to reach the AI TTS service.
* **model** - The ID of the model to use for TTS.
### Default Text-to-Speech and Voice Configuration
You can setup your preferred default Text-to-Speech and default voice in the UI:
* Go to **Settings**.
* Edit **System Services - Voice**.
* Set **OpenAI TTS Service** as **Default Text-to-Speech**.
* Choose your preferred **Default Voice** for your setup.

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.openhab.addons.bundles</groupId>
<artifactId>org.openhab.addons.reactor.bundles</artifactId>
<version>5.0.0-SNAPSHOT</version>
</parent>
<artifactId>org.openhab.voice.openaitts</artifactId>
<name>openHAB Add-ons :: Bundles :: Voice :: OpenAI Text-to-Speech</name>
</project>

View File

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<features name="org.openhab.voice.openaitts-${project.version}" xmlns="http://karaf.apache.org/xmlns/features/v1.4.0">
<repository>mvn:org.openhab.core.features.karaf/org.openhab.core.features.karaf.openhab-core/${ohc.version}/xml/features</repository>
<feature name="openhab-voice-openaitts" description="OpenAI Text-to-Speech" version="${project.version}">
<feature>openhab-runtime-base</feature>
<bundle start-level="80">mvn:org.openhab.addons.bundles/org.openhab.voice.openaitts/${project.version}</bundle>
</feature>
</features>

View File

@ -0,0 +1,27 @@
/**
* Copyright (c) 2010-2024 Contributors to the openHAB project
*
* See the NOTICE file(s) distributed with this work for additional
* information.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.openhab.voice.openaitts.internal;
import org.eclipse.jdt.annotation.NonNullByDefault;
/**
* @author Artur Fedjukevits - Initial contribution
*/
@NonNullByDefault
public class OpenAITTSConfiguration {
public String apiKey = "";
public String apiUrl = "https://api.openai.com/v1/audio/speech";
public String model = "tts-1";
public String speed = "1";
}

View File

@ -0,0 +1,25 @@
/**
* Copyright (c) 2010-2024 Contributors to the openHAB project
*
* See the NOTICE file(s) distributed with this work for additional
* information.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.openhab.voice.openaitts.internal;
import org.eclipse.jdt.annotation.NonNullByDefault;
/**
* @author Artur Fedjukevits - Initial contribution
*/
@NonNullByDefault
public class OpenAITTSConstants {
public static final String TTS_SERVICE_ID = "openaitts";
public static final String TTS_SERVICE_PID = "org.openhab.voice.openaitts";
}

View File

@ -0,0 +1,148 @@
/**
* Copyright (c) 2010-2024 Contributors to the openHAB project
*
* See the NOTICE file(s) distributed with this work for additional
* information.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.openhab.voice.openaitts.internal;
import static org.openhab.voice.openaitts.internal.OpenAITTSConstants.*;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.eclipse.jdt.annotation.NonNullByDefault;
import org.eclipse.jdt.annotation.Nullable;
import org.eclipse.jetty.client.HttpClient;
import org.eclipse.jetty.client.api.ContentResponse;
import org.eclipse.jetty.client.util.StringContentProvider;
import org.eclipse.jetty.http.HttpMethod;
import org.eclipse.jetty.http.HttpStatus;
import org.openhab.core.audio.AudioFormat;
import org.openhab.core.audio.AudioStream;
import org.openhab.core.audio.ByteArrayAudioStream;
import org.openhab.core.config.core.ConfigurableService;
import org.openhab.core.config.core.Configuration;
import org.openhab.core.io.net.http.HttpClientFactory;
import org.openhab.core.voice.AbstractCachedTTSService;
import org.openhab.core.voice.TTSCache;
import org.openhab.core.voice.TTSException;
import org.openhab.core.voice.TTSService;
import org.openhab.core.voice.Voice;
import org.osgi.framework.Constants;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Modified;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
/**
* @author Artur Fedjukevits - Initial contribution
* API documentation: https://platform.openai.com/docs/guides/text-to-speech
*/
@Component(configurationPid = TTS_SERVICE_PID, property = Constants.SERVICE_PID + "="
+ TTS_SERVICE_PID, service = TTSService.class)
@ConfigurableService(category = "voice", label = "OpenAI TTS Service", description_uri = "voice:" + TTS_SERVICE_ID)
@NonNullByDefault
public class OpenAITTSService extends AbstractCachedTTSService {
private static final int REQUEST_TIMEOUT_MS = 10_000;
private final Logger logger = LoggerFactory.getLogger(OpenAITTSService.class);
private OpenAITTSConfiguration config = new OpenAITTSConfiguration();
private final HttpClient httpClient;
private final Gson gson = new Gson();
private static final Set<Voice> VOICES = Stream.of("nova", "alloy", "echo", "fable", "onyx", "shimmer")
.map(OpenAITTSVoice::new).collect(Collectors.toSet());
@Activate
public OpenAITTSService(@Reference HttpClientFactory httpClientFactory, @Reference TTSCache ttsCache,
Map<String, Object> config) {
super(ttsCache);
this.httpClient = httpClientFactory.getCommonHttpClient();
}
@Activate
protected void activate(Map<String, Object> config) {
this.config = new Configuration(config).as(OpenAITTSConfiguration.class);
}
@Modified
protected void modified(Map<String, Object> config) {
this.config = new Configuration(config).as(OpenAITTSConfiguration.class);
}
@Override
public Set<AudioFormat> getSupportedFormats() {
return Set.of(new AudioFormat(AudioFormat.CONTAINER_NONE, AudioFormat.CODEC_MP3, null, 16, 64000, 44100L));
}
@Override
public String getId() {
return TTS_SERVICE_ID;
}
@Override
public String getLabel(@Nullable Locale locale) {
return "OpenAI TTS Service";
}
@Override
public Set<Voice> getAvailableVoices() {
return VOICES;
}
/**
* Synthesizes the given text to audio data using the OpenAI API
*
* @param text The text to synthesize
* @param voice The voice to use
* @param requestedFormat The requested audio format
* @return The synthesized audio data
* @throws TTSException If the synthesis fails
*/
@Override
public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
JsonObject content = new JsonObject();
content.addProperty("model", config.model);
content.addProperty("input", text);
content.addProperty("voice", voice.getLabel().toLowerCase());
content.addProperty("speed", config.speed);
String queryJson = gson.toJson(content);
try {
ContentResponse response = httpClient.newRequest(config.apiUrl).method(HttpMethod.POST)
.timeout(REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS)
.header("Authorization", "Bearer " + config.apiKey).header("Content-Type", "application/json")
.content(new StringContentProvider(queryJson)).send();
if (response.getStatus() == HttpStatus.OK_200) {
return new ByteArrayAudioStream(response.getContent(), requestedFormat);
} else {
logger.error("Request resulted in HTTP {} with message: {}", response.getStatus(),
response.getReason());
throw new TTSException("Failed to generate audio data");
}
} catch (InterruptedException | TimeoutException | ExecutionException e) {
logger.error("Request to OpenAI failed: {}", e.getMessage(), e);
throw new TTSException("Failed to generate audio data");
}
}
}

View File

@ -0,0 +1,61 @@
/**
* Copyright (c) 2010-2024 Contributors to the openHAB project
*
* See the NOTICE file(s) distributed with this work for additional
* information.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.openhab.voice.openaitts.internal;
import java.util.Locale;
import org.eclipse.jdt.annotation.NonNullByDefault;
import org.openhab.core.voice.Voice;
/**
* @author Artur Fedjukevits - Initial contribution
*/
@NonNullByDefault
public class OpenAITTSVoice implements Voice {
private final String label;
public OpenAITTSVoice(String label) {
this.label = label;
}
/**
* The unique identifier of the voice, used for internal purposes
*
* @return The unique identifier of the voice
*/
@Override
public String getUID() {
return "openaitts:" + label;
}
/**
* The voice label, used for GUI's or VUI's
*
* @return The voice label
*/
@Override
public String getLabel() {
return Character.toUpperCase(label.charAt(0)) + label.substring(1);
}
/**
* The locale of the voice
*
* @return The locale of the voice
*/
@Override
public Locale getLocale() {
return Locale.ENGLISH;
}
}

View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<addon:addon id="openaitts" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:addon="https://openhab.org/schemas/addon/v1.0.0"
xsi:schemaLocation="https://openhab.org/schemas/addon/v1.0.0 https://openhab.org/schemas/addon-1.0.0.xsd">
<type>voice</type>
<name>OpenAI Text-to-Speech</name>
<description>OpenAI TTS Service provides text-to-speech capabilities for openHAB.</description>
<connection>cloud</connection>
<service-id>org.openhab.voice.openaitts</service-id>
<config-description-ref uri="voice:openaitts"/>
</addon:addon>

View File

@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<config-description:config-descriptions
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:config-description="https://openhab.org/schemas/config-description/v1.0.0"
xsi:schemaLocation="https://openhab.org/schemas/config-description/v1.0.0
https://openhab.org/schemas/config-description-1.0.0.xsd">
<config-description uri="voice:openaitts">
<parameter-group name="authentication">
<label>Authentication</label>
<description>Authentication for connecting to OpenAI API.</description>
</parameter-group>
<parameter-group name="tts">
<label>TTS Configuration</label>
<description>Configure Text to Speech.</description>
</parameter-group>
<parameter name="apiKey" type="text" required="true" groupName="authentication">
<label>API Key</label>
<required>true</required>
<description>OpenAI API key.</description>
<context>password</context>
</parameter>
<parameter name="apiUrl" type="text" required="true" groupName="authentication">
<label>API URL</label>
<required>true</required>
<description>TTS host API URL.</description>
<default>https://api.openai.com/v1/audio/speech</default>
</parameter>
<parameter name="model" type="text" required="true" groupName="tts">
<label>Model</label>
<required>true</required>
<description>ID of the model to use.</description>
<options>
<option value="tts-1">tts-1</option>
<option value="tts-1-hd">tts-1-hd</option>
</options>
<limitToOptions>false</limitToOptions>
<default>tts-1</default>
</parameter>
<parameter name="speed" type="decimal" min="0.25" max="4" groupName="tts">
<label>Speed</label>
<description>The speed of the generated audio. Select a value from 0.25 to 4.0.</description>
<default>1.0</default>
</parameter>
</config-description>
</config-description:config-descriptions>

View File

@ -0,0 +1,17 @@
# add-on
addon.openaitts.name = OpenAI Text-to-Speech
addon.openaitts.description = OpenAI TTS Service provides text-to-speech capabilities for openHAB.
voice.config.openaitts.apiKey.label = API Key
voice.config.openaitts.apiKey.description = OpenAI API key.
voice.config.openaitts.apiUrl.label = API URL
voice.config.openaitts.apiUrl.description = TTS host API URL.
voice.config.openaitts.group.authentication.label = Authentication
voice.config.openaitts.group.authentication.description = Authentication for connecting to OpenAI API.
voice.config.openaitts.group.tts.label = TTS Configuration
voice.config.openaitts.group.tts.description = Configure Text to Speech.
voice.config.openaitts.model.label = Model
voice.config.openaitts.model.description = ID of the model to use.
voice.config.openaitts.speed.label = Speed
voice.config.openaitts.speed.description = The speed of the generated audio. Select a value from 0.25 to 4.0.

View File

@ -483,6 +483,7 @@
<module>org.openhab.voice.mactts</module>
<module>org.openhab.voice.marytts</module>
<module>org.openhab.voice.mimictts</module>
<module>org.openhab.voice.openaitts</module>
<module>org.openhab.voice.picotts</module>
<module>org.openhab.voice.pipertts</module>
<module>org.openhab.voice.pollytts</module>