mirror of https://github.com/coqui-ai/TTS.git
commit
370fb1da81
|
@ -2,6 +2,7 @@ import argparse
|
|||
import importlib
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from argparse import RawTextHelpFormatter
|
||||
|
||||
import numpy as np
|
||||
|
@ -18,7 +19,7 @@ from TTS.utils.audio import AudioProcessor
|
|||
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
# pylint: disable=bad-option-value
|
||||
parser = argparse.ArgumentParser(
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from argparse import RawTextHelpFormatter
|
||||
|
||||
import torch
|
||||
|
@ -102,7 +103,7 @@ def compute_embeddings(
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""
|
||||
|
|
|
@ -5,6 +5,7 @@ import argparse
|
|||
import glob
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
@ -18,7 +19,7 @@ from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
|
|||
|
||||
def main():
|
||||
"""Run preprocessing process."""
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stderr, formatter=ConsoleFormatter())
|
||||
|
||||
parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
|
||||
parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
from argparse import RawTextHelpFormatter
|
||||
|
||||
import torch
|
||||
|
@ -53,7 +54,7 @@ def compute_encoder_accuracy(dataset_items, encoder_manager):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="""Compute the accuracy of the encoder.\n\n"""
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
@ -273,7 +274,7 @@ def main(args): # pylint: disable=redefined-outer-name
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
from argparse import RawTextHelpFormatter
|
||||
|
||||
from TTS.config import load_config
|
||||
|
@ -10,7 +11,7 @@ from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
|
|||
|
||||
|
||||
def main():
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
# pylint: disable=bad-option-value
|
||||
parser = argparse.ArgumentParser(
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
import argparse
|
||||
import logging
|
||||
import multiprocessing
|
||||
import sys
|
||||
from argparse import RawTextHelpFormatter
|
||||
|
||||
from tqdm.contrib.concurrent import process_map
|
||||
|
@ -20,7 +21,7 @@ def compute_phonemes(item):
|
|||
|
||||
|
||||
def main():
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
# pylint: disable=W0601
|
||||
global c, phonemizer
|
||||
|
|
|
@ -4,6 +4,7 @@ import logging
|
|||
import multiprocessing
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
@ -77,7 +78,7 @@ def preprocess_audios():
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end"
|
||||
|
|
|
@ -311,8 +311,9 @@ def parse_args() -> argparse.Namespace:
|
|||
|
||||
def main() -> None:
|
||||
"""Entry point for `tts` command line interface."""
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
args = parse_args()
|
||||
stream = sys.stderr if args.pipe_out else sys.stdout
|
||||
setup_logger("TTS", level=logging.INFO, stream=stream, formatter=ConsoleFormatter())
|
||||
|
||||
pipe_out = sys.stdout if args.pipe_out else None
|
||||
|
||||
|
|
|
@ -322,7 +322,7 @@ def main(args): # pylint: disable=redefined-outer-name
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
@ -17,7 +18,7 @@ class TrainTTSArgs(TrainerArgs):
|
|||
|
||||
def main():
|
||||
"""Run `tts` model training directly by a `config.json` file."""
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
# init trainer args
|
||||
train_args = TrainTTSArgs()
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from trainer import Trainer, TrainerArgs
|
||||
|
@ -18,7 +19,7 @@ class TrainVocoderArgs(TrainerArgs):
|
|||
|
||||
def main():
|
||||
"""Run `tts` model training directly by a `config.json` file."""
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
# init trainer args
|
||||
train_args = TrainVocoderArgs()
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import argparse
|
||||
import logging
|
||||
import sys
|
||||
from itertools import product as cartesian_product
|
||||
|
||||
import numpy as np
|
||||
|
@ -17,7 +18,7 @@ from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset
|
|||
from TTS.vocoder.models import setup_model
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model_path", type=str, help="Path to model checkpoint.")
|
||||
|
|
|
@ -216,7 +216,7 @@ def processor(directory, subset, force_process):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
if len(sys.argv) != 4:
|
||||
print("Usage: python prepare_data.py save_directory user password")
|
||||
sys.exit()
|
||||
|
|
|
@ -25,7 +25,7 @@ from TTS.utils.manage import ModelManager
|
|||
from TTS.utils.synthesizer import Synthesizer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
|
||||
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
|
||||
|
||||
|
||||
def create_argparser() -> argparse.ArgumentParser:
|
||||
|
|
|
@ -2,9 +2,10 @@
|
|||
import datetime
|
||||
import importlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Optional, TypeVar, Union
|
||||
from typing import Any, Callable, Dict, Optional, TextIO, TypeVar, Union
|
||||
|
||||
import torch
|
||||
from packaging.version import Version
|
||||
|
@ -107,25 +108,34 @@ def setup_logger(
|
|||
level: int = logging.INFO,
|
||||
*,
|
||||
formatter: Optional[logging.Formatter] = None,
|
||||
screen: bool = False,
|
||||
tofile: bool = False,
|
||||
log_dir: str = "logs",
|
||||
stream: Optional[TextIO] = None,
|
||||
log_dir: Optional[Union[str, os.PathLike[Any]]] = None,
|
||||
log_name: str = "log",
|
||||
) -> None:
|
||||
"""Set up a logger.
|
||||
|
||||
Args:
|
||||
logger_name: Name of the logger to set up
|
||||
level: Logging level
|
||||
formatter: Formatter for the logger
|
||||
stream: Add a StreamHandler for the given stream, e.g. sys.stderr or sys.stdout
|
||||
log_dir: Folder to write the log file (no file created if None)
|
||||
log_name: Prefix of the log file name
|
||||
"""
|
||||
lg = logging.getLogger(logger_name)
|
||||
if formatter is None:
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s.%(msecs)03d - %(levelname)-8s - %(name)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S"
|
||||
)
|
||||
lg.setLevel(level)
|
||||
if tofile:
|
||||
if log_dir is not None:
|
||||
Path(log_dir).mkdir(exist_ok=True, parents=True)
|
||||
log_file = Path(log_dir) / f"{log_name}_{get_timestamp()}.log"
|
||||
fh = logging.FileHandler(log_file, mode="w")
|
||||
fh.setFormatter(formatter)
|
||||
lg.addHandler(fh)
|
||||
if screen:
|
||||
sh = logging.StreamHandler()
|
||||
if stream is not None:
|
||||
sh = logging.StreamHandler(stream)
|
||||
sh.setFormatter(formatter)
|
||||
lg.addHandler(sh)
|
||||
|
||||
|
|
|
@ -163,12 +163,13 @@ from TTS.api import TTS
|
|||
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")
|
||||
|
||||
# generate speech by cloning a voice using default settings
|
||||
tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
|
||||
file_path="output.wav",
|
||||
speaker="Ana Florence",
|
||||
language="en",
|
||||
split_sentences=True
|
||||
)
|
||||
tts.tts_to_file(
|
||||
text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
|
||||
file_path="output.wav",
|
||||
speaker="Ana Florence",
|
||||
language="en",
|
||||
split_sentences=True
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
|
@ -230,6 +231,11 @@ out = model.inference(
|
|||
torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
|
||||
```
|
||||
|
||||
You can also use the Coqui speakers:
|
||||
|
||||
```python
|
||||
gpt_cond_latent, speaker_embedding = model.speaker_manager.speakers["Ana Florence"].values()
|
||||
```
|
||||
|
||||
#### Streaming manually
|
||||
|
||||
|
|
Loading…
Reference in New Issue