mirror of https://github.com/coqui-ai/TTS.git
Update README with new device API (#2876)
* docs: update readme w/ .to(device) api * docs: add .to(device) in python quickstart * docs: move section header out of comment * chore: use device instead of hard-coded string * docs: update inference.mdpull/2899/head
parent
c0b5e61749
commit
fa0cbd78fe
21
README.md
21
README.md
|
@ -187,18 +187,21 @@ More details about the docker images (like GPU support) can be found [here](http
|
|||
|
||||
### 🐍 Python API
|
||||
|
||||
#### Running a multi-speaker and multi-lingual model
|
||||
|
||||
```python
|
||||
import torch
|
||||
from TTS.api import TTS
|
||||
|
||||
# Running a multi-speaker and multi-lingual model
|
||||
# Get device
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
# List available 🐸TTS models and choose the first one
|
||||
model_name = TTS.list_models()[0]
|
||||
model_name = TTS().list_models()[0]
|
||||
# Init TTS
|
||||
tts = TTS(model_name)
|
||||
tts = TTS(model_name).to(device)
|
||||
|
||||
# Run TTS
|
||||
|
||||
# ❗ Since this model is multi-speaker and multi-lingual, we must set the target speaker and the language
|
||||
# Text to speech with a numpy output
|
||||
wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0])
|
||||
|
@ -210,13 +213,13 @@ tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.langu
|
|||
|
||||
```python
|
||||
# Init TTS with the target model name
|
||||
tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False)
|
||||
tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False).to(device)
|
||||
|
||||
# Run TTS
|
||||
tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH)
|
||||
|
||||
# Example voice cloning with YourTTS in English, French and Portuguese
|
||||
|
||||
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
|
||||
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
|
||||
tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav")
|
||||
tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr-fr", file_path="output.wav")
|
||||
tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt-br", file_path="output.wav")
|
||||
|
@ -227,7 +230,7 @@ tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav",
|
|||
Converting the voice in `source_wav` to the voice of `target_wav`
|
||||
|
||||
```python
|
||||
tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False, gpu=True)
|
||||
tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to("cuda")
|
||||
tts.voice_conversion_to_file(source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav")
|
||||
```
|
||||
|
||||
|
@ -256,7 +259,7 @@ These models will follow the naming convention `coqui_studio/en/<studio_speaker_
|
|||
# XTTS model
|
||||
models = TTS(cs_api_model="XTTS").list_models()
|
||||
# Init TTS with the target studio speaker
|
||||
tts = TTS(model_name="coqui_studio/en/Torcull Diarmuid/coqui_studio", progress_bar=False, gpu=False)
|
||||
tts = TTS(model_name="coqui_studio/en/Torcull Diarmuid/coqui_studio", progress_bar=False)
|
||||
# Run TTS
|
||||
tts.tts_to_file(text="This is a test.", file_path=OUTPUT_PATH)
|
||||
|
||||
|
|
|
@ -117,7 +117,7 @@ You can run a multi-speaker and multi-lingual model in Python as
|
|||
from TTS.api import TTS
|
||||
|
||||
# List available 🐸TTS models and choose the first one
|
||||
model_name = TTS.list_models()[0]
|
||||
model_name = TTS().list_models()[0]
|
||||
# Init TTS
|
||||
tts = TTS(model_name)
|
||||
# Run TTS
|
||||
|
@ -132,7 +132,7 @@ tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.langu
|
|||
|
||||
```python
|
||||
# Init TTS with the target model name
|
||||
tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False)
|
||||
tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False)
|
||||
# Run TTS
|
||||
tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH)
|
||||
```
|
||||
|
@ -140,7 +140,7 @@ tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH)
|
|||
#### Example voice cloning with YourTTS in English, French and Portuguese:
|
||||
|
||||
```python
|
||||
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
|
||||
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda")
|
||||
tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav")
|
||||
tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr", file_path="output.wav")
|
||||
tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt", file_path="output.wav")
|
||||
|
@ -149,7 +149,7 @@ tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav",
|
|||
#### Example voice conversion converting speaker of the `source_wav` to the speaker of the `target_wav`
|
||||
|
||||
```python
|
||||
tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False, gpu=True)
|
||||
tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to("cuda")
|
||||
tts.voice_conversion_to_file(source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav")
|
||||
```
|
||||
|
||||
|
@ -177,7 +177,7 @@ You should set the `COQUI_STUDIO_TOKEN` environment variable to use the API toke
|
|||
# The name format is coqui_studio/en/<studio_speaker_name>/coqui_studio
|
||||
models = TTS().list_models()
|
||||
# Init TTS with the target studio speaker
|
||||
tts = TTS(model_name="coqui_studio/en/Torcull Diarmuid/coqui_studio", progress_bar=False, gpu=False)
|
||||
tts = TTS(model_name="coqui_studio/en/Torcull Diarmuid/coqui_studio", progress_bar=False)
|
||||
# Run TTS
|
||||
tts.tts_to_file(text="This is a test.", file_path=OUTPUT_PATH)
|
||||
# Run TTS with emotion and speed control
|
||||
|
@ -222,7 +222,7 @@ You can find the list of language ISO codes [here](https://dl.fbaipublicfiles.co
|
|||
|
||||
```python
|
||||
from TTS.api import TTS
|
||||
api = TTS(model_name="tts_models/eng/fairseq/vits", gpu=True)
|
||||
api = TTS(model_name="tts_models/eng/fairseq/vits").to("cuda")
|
||||
api.tts_to_file("This is a test.", file_path="output.wav")
|
||||
|
||||
# TTS with on the fly voice conversion
|
||||
|
|
Loading…
Reference in New Issue