Ensure `tts` CLI tool readme and usage help is in sync

pull/2993/head
Aarni Koskela 2023-09-25 15:03:51 +03:00
parent 0b95b88f13
commit 5c047cf304
4 changed files with 236 additions and 142 deletions

View File

@ -294,6 +294,15 @@ api.tts_with_vc_to_file(
``` ```
### Command-line `tts` ### Command-line `tts`
<!-- begin-tts-readme -->
Synthesize speech on command line.
You can either use your trained model or choose a model from the provided list.
If you don't specify any models, then it uses LJSpeech based English model.
#### Single Speaker Models #### Single Speaker Models
- List provided models: - List provided models:
@ -301,31 +310,37 @@ api.tts_with_vc_to_file(
``` ```
$ tts --list_models $ tts --list_models
``` ```
- Get model info (for both tts_models and vocoder_models): - Get model info (for both tts_models and vocoder_models):
- Query by type/name: - Query by type/name:
The model_info_by_name uses the name as it from the --list_models. The model_info_by_name uses the name as it from the --list_models.
``` ```
$ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>" $ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>"
``` ```
For example: For example:
``` ```
$ tts --model_info_by_name tts_models/tr/common-voice/glow-tts $ tts --model_info_by_name tts_models/tr/common-voice/glow-tts
```
```
$ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2 $ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2
``` ```
- Query by type/idx: - Query by type/idx:
The model_query_idx uses the corresponding idx from --list_models. The model_query_idx uses the corresponding idx from --list_models.
``` ```
$ tts --model_info_by_idx "<model_type>/<model_query_idx>" $ tts --model_info_by_idx "<model_type>/<model_query_idx>"
``` ```
For example: For example:
``` ```
$ tts --model_info_by_idx tts_models/3 $ tts --model_info_by_idx tts_models/3
``` ```
- Query info for model info by full name:
```
$ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>"
```
- Run TTS with default models: - Run TTS with default models:
``` ```
@ -337,6 +352,7 @@ api.tts_with_vc_to_file(
``` ```
$ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --out_path output/path/speech.wav $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --out_path output/path/speech.wav
``` ```
For example: For example:
``` ```
@ -355,7 +371,6 @@ api.tts_with_vc_to_file(
$ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav
``` ```
- Run your own TTS model (Using Griffin-Lim Vocoder): - Run your own TTS model (Using Griffin-Lim Vocoder):
``` ```
@ -363,6 +378,7 @@ api.tts_with_vc_to_file(
``` ```
- Run your own TTS and Vocoder models: - Run your own TTS and Vocoder models:
``` ```
$ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
--vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
@ -388,6 +404,14 @@ api.tts_with_vc_to_file(
$ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id> $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
``` ```
### Voice Conversion Models
```
$ tts --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --source_wav <path/to/speaker/wav> --target_wav <path/to/reference/wav>
```
<!-- end-tts-readme -->
## Directory Structure ## Directory Structure
``` ```
|- notebooks/ (Jupyter Notebooks for model evaluation, parameter selection and data analysis.) |- notebooks/ (Jupyter Notebooks for model evaluation, parameter selection and data analysis.)

View File

@ -12,27 +12,14 @@ from TTS.api import TTS
from TTS.utils.manage import ModelManager from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer from TTS.utils.synthesizer import Synthesizer
description = """
def str2bool(v): Synthesize speech on command line.
if isinstance(v, bool):
return v
if v.lower() in ("yes", "true", "t", "y", "1"):
return True
if v.lower() in ("no", "false", "f", "n", "0"):
return False
raise argparse.ArgumentTypeError("Boolean value expected.")
def main():
description = """Synthesize speech on command line.
You can either use your trained model or choose a model from the provided list. You can either use your trained model or choose a model from the provided list.
If you don't specify any models, then it uses LJSpeech based English model. If you don't specify any models, then it uses LJSpeech based English model.
## Example Runs #### Single Speaker Models
### Single Speaker Models
- List provided models: - List provided models:
@ -40,14 +27,32 @@ If you don't specify any models, then it uses LJSpeech based English model.
$ tts --list_models $ tts --list_models
``` ```
- Query info for model info by idx: - Get model info (for both tts_models and vocoder_models):
- Query by type/name:
The model_info_by_name uses the name as it from the --list_models.
```
$ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>"
```
For example:
```
$ tts --model_info_by_name tts_models/tr/common-voice/glow-tts
$ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2
```
- Query by type/idx:
The model_query_idx uses the corresponding idx from --list_models.
``` ```
$ tts --model_info_by_idx "<model_type>/<model_query_idx>" $ tts --model_info_by_idx "<model_type>/<model_query_idx>"
``` ```
- Query info for model info by full name: For example:
```
$ tts --model_info_by_idx tts_models/3
```
- Query info for model info by full name:
``` ```
$ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>" $ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>"
``` ```
@ -55,19 +60,31 @@ If you don't specify any models, then it uses LJSpeech based English model.
- Run TTS with default models: - Run TTS with default models:
``` ```
$ tts --text "Text for TTS" $ tts --text "Text for TTS" --out_path output/path/speech.wav
``` ```
- Run a TTS model with its default vocoder model: - Run a TTS model with its default vocoder model:
``` ```
$ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name> $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --out_path output/path/speech.wav
```
For example:
```
$ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --out_path output/path/speech.wav
``` ```
- Run with specific TTS and vocoder models from the list: - Run with specific TTS and vocoder models from the list:
``` ```
$ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --vocoder_name "<model_type>/<language>/<dataset>/<model_name>" --output_path $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --vocoder_name "<model_type>/<language>/<dataset>/<model_name>" --out_path output/path/speech.wav
```
For example:
```
$ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav
``` ```
- Run your own TTS model (Using Griffin-Lim Vocoder): - Run your own TTS model (Using Griffin-Lim Vocoder):
@ -77,14 +94,15 @@ If you don't specify any models, then it uses LJSpeech based English model.
``` ```
- Run your own TTS and Vocoder models: - Run your own TTS and Vocoder models:
``` ```
$ tts --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth --out_path output/path/speech.wav $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
--vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
``` ```
### Multi-speaker Models #### Multi-speaker Models
- List the available speakers and choose as <speaker_id> among them: - List the available speakers and choose a <speaker_id> among them:
``` ```
$ tts --model_name "<language>/<dataset>/<model_name>" --list_speaker_idxs $ tts --model_name "<language>/<dataset>/<model_name>" --list_speaker_idxs
@ -99,7 +117,7 @@ If you don't specify any models, then it uses LJSpeech based English model.
- Run your own multi-speaker TTS model: - Run your own multi-speaker TTS model:
``` ```
$ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/config.json --config_path path/to/model.pth --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id> $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
``` ```
### Voice Conversion Models ### Voice Conversion Models
@ -108,8 +126,19 @@ If you don't specify any models, then it uses LJSpeech based English model.
$ tts --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --source_wav <path/to/speaker/wav> --target_wav <path/to/reference/wav> $ tts --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --source_wav <path/to/speaker/wav> --target_wav <path/to/reference/wav>
``` ```
""" """
# We remove Markdown code formatting programmatically here to allow us to copy-and-paste from main README to keep
# documentation in sync more easily.
def str2bool(v):
if isinstance(v, bool):
return v
if v.lower() in ("yes", "true", "t", "y", "1"):
return True
if v.lower() in ("no", "false", "f", "n", "0"):
return False
raise argparse.ArgumentTypeError("Boolean value expected.")
def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description=description.replace(" ```\n", ""), description=description.replace(" ```\n", ""),
formatter_class=RawTextHelpFormatter, formatter_class=RawTextHelpFormatter,

32
scripts/sync_readme.py Normal file
View File

@ -0,0 +1,32 @@
import argparse
from pathlib import Path
def replace_between_markers(content, marker: str, replacement: str) -> str:
start_marker = f"<!-- begin-{marker} -->\n\n"
end_marker = f"\n\n<!-- end-{marker} -->\n"
start_index = content.index(start_marker) + len(start_marker)
end_index = content.index(end_marker)
content = content[:start_index] + replacement + content[end_index:]
return content
def sync_readme():
ap = argparse.ArgumentParser()
ap.add_argument("--check", action="store_true", default=False)
args = ap.parse_args()
readme_path = Path(__file__).parent.parent / "README.md"
orig_content = readme_path.read_text()
from TTS.bin.synthesize import description
new_content = replace_between_markers(orig_content, "tts-readme", description.strip())
if args.check:
if orig_content != new_content:
print("README.md is out of sync; please edit TTS/bin/TTS_README.md and run scripts/sync_readme.py")
exit(42)
readme_path.write_text(new_content)
print("Updated README.md")
if __name__ == "__main__":
sync_readme()

View File

@ -0,0 +1,9 @@
import subprocess
import sys
from pathlib import Path
def test_readme_up_to_date():
root = Path(__file__).parent.parent.parent
sync_readme = root / "scripts" / "sync_readme.py"
subprocess.check_call([sys.executable, str(sync_readme), "--check"], cwd=root)