Merge pull request #217 from idiap/stdout

fix(bin): log to stdout in cli tools
pull/4115/head^2
Enno Hermann 2024-12-17 14:22:46 +01:00 committed by GitHub
commit 370fb1da81
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 56 additions and 28 deletions

View File

@ -2,6 +2,7 @@ import argparse
import importlib
import logging
import os
import sys
from argparse import RawTextHelpFormatter
import numpy as np
@ -18,7 +19,7 @@ from TTS.utils.audio import AudioProcessor
from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(

View File

@ -1,6 +1,7 @@
import argparse
import logging
import os
import sys
from argparse import RawTextHelpFormatter
import torch
@ -102,7 +103,7 @@ def compute_embeddings(
if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
parser = argparse.ArgumentParser(
description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n"""

View File

@ -5,6 +5,7 @@ import argparse
import glob
import logging
import os
import sys
import numpy as np
from tqdm import tqdm
@ -18,7 +19,7 @@ from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
def main():
"""Run preprocessing process."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stderr, formatter=ConsoleFormatter())
parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.")
parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.")

View File

@ -1,5 +1,6 @@
import argparse
import logging
import sys
from argparse import RawTextHelpFormatter
import torch
@ -53,7 +54,7 @@ def compute_encoder_accuracy(dataset_items, encoder_manager):
if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
parser = argparse.ArgumentParser(
description="""Compute the accuracy of the encoder.\n\n"""

View File

@ -4,6 +4,7 @@
import argparse
import logging
import os
import sys
import numpy as np
import torch
@ -273,7 +274,7 @@ def main(args): # pylint: disable=redefined-outer-name
if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
parser = argparse.ArgumentParser()
parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True)

View File

@ -2,6 +2,7 @@
import argparse
import logging
import sys
from argparse import RawTextHelpFormatter
from TTS.config import load_config
@ -10,7 +11,7 @@ from TTS.utils.generic_utils import ConsoleFormatter, setup_logger
def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
# pylint: disable=bad-option-value
parser = argparse.ArgumentParser(

View File

@ -3,6 +3,7 @@
import argparse
import logging
import multiprocessing
import sys
from argparse import RawTextHelpFormatter
from tqdm.contrib.concurrent import process_map
@ -20,7 +21,7 @@ def compute_phonemes(item):
def main():
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
# pylint: disable=W0601
global c, phonemizer

View File

@ -4,6 +4,7 @@ import logging
import multiprocessing
import os
import pathlib
import sys
import torch
from tqdm import tqdm
@ -77,7 +78,7 @@ def preprocess_audios():
if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
parser = argparse.ArgumentParser(
description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end"

View File

@ -311,8 +311,9 @@ def parse_args() -> argparse.Namespace:
def main() -> None:
"""Entry point for `tts` command line interface."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
args = parse_args()
stream = sys.stderr if args.pipe_out else sys.stdout
setup_logger("TTS", level=logging.INFO, stream=stream, formatter=ConsoleFormatter())
pipe_out = sys.stdout if args.pipe_out else None

View File

@ -322,7 +322,7 @@ def main(args): # pylint: disable=redefined-outer-name
if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training()

View File

@ -1,5 +1,6 @@
import logging
import os
import sys
from dataclasses import dataclass, field
from trainer import Trainer, TrainerArgs
@ -17,7 +18,7 @@ class TrainTTSArgs(TrainerArgs):
def main():
"""Run `tts` model training directly by a `config.json` file."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
# init trainer args
train_args = TrainTTSArgs()

View File

@ -1,5 +1,6 @@
import logging
import os
import sys
from dataclasses import dataclass, field
from trainer import Trainer, TrainerArgs
@ -18,7 +19,7 @@ class TrainVocoderArgs(TrainerArgs):
def main():
"""Run `tts` model training directly by a `config.json` file."""
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
# init trainer args
train_args = TrainVocoderArgs()

View File

@ -2,6 +2,7 @@
import argparse
import logging
import sys
from itertools import product as cartesian_product
import numpy as np
@ -17,7 +18,7 @@ from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset
from TTS.vocoder.models import setup_model
if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
parser = argparse.ArgumentParser()
parser.add_argument("--model_path", type=str, help="Path to model checkpoint.")

View File

@ -216,7 +216,7 @@ def processor(directory, subset, force_process):
if __name__ == "__main__":
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
if len(sys.argv) != 4:
print("Usage: python prepare_data.py save_directory user password")
sys.exit()

View File

@ -25,7 +25,7 @@ from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
logger = logging.getLogger(__name__)
setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter())
setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter())
def create_argparser() -> argparse.ArgumentParser:

View File

@ -2,9 +2,10 @@
import datetime
import importlib
import logging
import os
import re
from pathlib import Path
from typing import Any, Callable, Dict, Optional, TypeVar, Union
from typing import Any, Callable, Dict, Optional, TextIO, TypeVar, Union
import torch
from packaging.version import Version
@ -107,25 +108,34 @@ def setup_logger(
level: int = logging.INFO,
*,
formatter: Optional[logging.Formatter] = None,
screen: bool = False,
tofile: bool = False,
log_dir: str = "logs",
stream: Optional[TextIO] = None,
log_dir: Optional[Union[str, os.PathLike[Any]]] = None,
log_name: str = "log",
) -> None:
"""Set up a logger.
Args:
logger_name: Name of the logger to set up
level: Logging level
formatter: Formatter for the logger
stream: Add a StreamHandler for the given stream, e.g. sys.stderr or sys.stdout
log_dir: Folder to write the log file (no file created if None)
log_name: Prefix of the log file name
"""
lg = logging.getLogger(logger_name)
if formatter is None:
formatter = logging.Formatter(
"%(asctime)s.%(msecs)03d - %(levelname)-8s - %(name)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S"
)
lg.setLevel(level)
if tofile:
if log_dir is not None:
Path(log_dir).mkdir(exist_ok=True, parents=True)
log_file = Path(log_dir) / f"{log_name}_{get_timestamp()}.log"
fh = logging.FileHandler(log_file, mode="w")
fh.setFormatter(formatter)
lg.addHandler(fh)
if screen:
sh = logging.StreamHandler()
if stream is not None:
sh = logging.StreamHandler(stream)
sh.setFormatter(formatter)
lg.addHandler(sh)

View File

@ -163,12 +163,13 @@ from TTS.api import TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda")
# generate speech by cloning a voice using default settings
tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
file_path="output.wav",
speaker="Ana Florence",
language="en",
split_sentences=True
)
tts.tts_to_file(
text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
file_path="output.wav",
speaker="Ana Florence",
language="en",
split_sentences=True
)
```
@ -230,6 +231,11 @@ out = model.inference(
torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000)
```
You can also use the Coqui speakers:
```python
gpt_cond_latent, speaker_embedding = model.speaker_manager.speakers["Ana Florence"].values()
```
#### Streaming manually