mirror of https://github.com/coqui-ai/TTS.git
Merge pull request #269 from mozilla/fix-install-yet-another-time
Fix installation by using an explicit symlinkpull/10/head
commit
234b44db9b
|
@ -11,5 +11,7 @@ fi
|
|||
|
||||
if [[ "$TEST_SUITE" == "unittest" ]]; then
|
||||
# Run tests on all pushes
|
||||
pushd tts_namespace
|
||||
python -m unittest
|
||||
popd
|
||||
fi
|
||||
|
|
|
@ -5,8 +5,8 @@ import torch
|
|||
import random
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
from utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos
|
||||
from utils.data import prepare_data, prepare_tensor, prepare_stop_target
|
||||
from TTS.utils.text import text_to_sequence, phoneme_to_sequence, pad_with_eos_bos
|
||||
from TTS.utils.data import prepare_data, prepare_tensor, prepare_stop_target
|
||||
|
||||
|
||||
class MyDataset(Dataset):
|
||||
|
|
|
@ -9,7 +9,7 @@ import torch.distributed as dist
|
|||
from torch.utils.data.sampler import Sampler
|
||||
from torch.autograd import Variable
|
||||
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
|
||||
from utils.generic_utils import load_config, create_experiment_folder
|
||||
from TTS.utils.generic_utils import load_config, create_experiment_folder
|
||||
|
||||
|
||||
class DistributedSampler(Sampler):
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from torch import nn
|
||||
from torch.nn import functional
|
||||
from utils.generic_utils import sequence_mask
|
||||
from TTS.utils.generic_utils import sequence_mask
|
||||
|
||||
|
||||
class L1LossMasked(nn.Module):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# coding: utf-8
|
||||
from torch import nn
|
||||
from layers.tacotron import Encoder, Decoder, PostCBHG
|
||||
from utils.generic_utils import sequence_mask
|
||||
from TTS.layers.tacotron import Encoder, Decoder, PostCBHG
|
||||
from TTS.utils.generic_utils import sequence_mask
|
||||
|
||||
|
||||
class Tacotron(nn.Module):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from math import sqrt
|
||||
from torch import nn
|
||||
from layers.tacotron2 import Encoder, Decoder, Postnet
|
||||
from utils.generic_utils import sequence_mask
|
||||
from TTS.layers.tacotron2 import Encoder, Decoder, Postnet
|
||||
from TTS.utils.generic_utils import sequence_mask
|
||||
|
||||
|
||||
# TODO: match function arguments with tacotron
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# coding: utf-8
|
||||
from torch import nn
|
||||
from layers.tacotron import Encoder, Decoder, PostCBHG
|
||||
from layers.gst_layers import GST
|
||||
from utils.generic_utils import sequence_mask
|
||||
from TTS.layers.tacotron import Encoder, Decoder, PostCBHG
|
||||
from TTS.layers.gst_layers import GST
|
||||
from TTS.utils.generic_utils import sequence_mask
|
||||
|
||||
|
||||
class TacotronGST(nn.Module):
|
||||
|
|
|
@ -138,7 +138,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# LOAD TTS MODEL\n",
|
||||
"from utils.text.symbols import symbols, phonemes\n",
|
||||
"from TTS.utils.text.symbols import symbols, phonemes\n",
|
||||
"\n",
|
||||
"# multi speaker \n",
|
||||
"if CONFIG.use_speaker_embedding:\n",
|
||||
|
|
|
@ -105,10 +105,10 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from utils.text.symbols import symbols, phonemes\n",
|
||||
"from utils.generic_utils import sequence_mask\n",
|
||||
"from layers.losses import L1LossMasked\n",
|
||||
"from utils.text.symbols import symbols, phonemes\n",
|
||||
"from TTS.utils.text.symbols import symbols, phonemes\n",
|
||||
"from TTS.utils.generic_utils import sequence_mask\n",
|
||||
"from TTS.layers.losses import L1LossMasked\n",
|
||||
"from TTS.utils.text.symbols import symbols, phonemes\n",
|
||||
"\n",
|
||||
"# load the model\n",
|
||||
"num_chars = len(phonemes) if C.use_phonemes else len(symbols)\n",
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!flask/bin/python
|
||||
import argparse
|
||||
from synthesizer import Synthesizer
|
||||
from utils.generic_utils import load_config
|
||||
from TTS.utils.generic_utils import load_config
|
||||
from flask import Flask, request, render_template, send_file
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
|
|
@ -5,11 +5,11 @@ import numpy as np
|
|||
import torch
|
||||
import sys
|
||||
|
||||
from utils.audio import AudioProcessor
|
||||
from utils.generic_utils import load_config, setup_model
|
||||
from utils.text import phonemes, symbols
|
||||
from utils.speakers import load_speaker_mapping
|
||||
from utils.synthesis import *
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.generic_utils import load_config, setup_model
|
||||
from TTS.utils.text import phonemes, symbols
|
||||
from TTS.utils.speakers import load_speaker_mapping
|
||||
from TTS.utils.synthesis import *
|
||||
|
||||
import re
|
||||
alphabets = r"([A-Za-z])"
|
||||
|
|
4
setup.py
4
setup.py
|
@ -62,8 +62,8 @@ setup(
|
|||
version=version,
|
||||
url='https://github.com/mozilla/TTS',
|
||||
description='Text to Speech with Deep Learning',
|
||||
package_dir={'TTS': '.'},
|
||||
packages=['TTS'] + ['TTS.' + pkg for pkg in find_packages()],
|
||||
package_dir={'': 'tts_namespace'},
|
||||
packages=find_packages('tts_namespace'),
|
||||
cmdclass={
|
||||
'build_py': build_py,
|
||||
'develop': develop,
|
||||
|
|
|
@ -4,10 +4,10 @@ import argparse
|
|||
import torch
|
||||
import string
|
||||
|
||||
from utils.synthesis import synthesis
|
||||
from utils.generic_utils import load_config, setup_model
|
||||
from utils.text.symbols import symbols, phonemes
|
||||
from utils.audio import AudioProcessor
|
||||
from TTS.utils.synthesis import synthesis
|
||||
from TTS.utils.generic_utils import load_config, setup_model
|
||||
from TTS.utils.text.symbols import symbols, phonemes
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
|
||||
|
||||
def tts(model,
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import unittest
|
||||
import torch as T
|
||||
|
||||
from utils.generic_utils import save_checkpoint, save_best_model
|
||||
from layers.tacotron import Prenet
|
||||
from TTS.utils.generic_utils import save_checkpoint, save_best_model
|
||||
from TTS.layers.tacotron import Prenet
|
||||
|
||||
OUT_PATH = '/tmp/test.pth.tar'
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"tts_path":"tests/outputs/", // tts model root folder
|
||||
"tts_path":"TTS/tests/outputs/", // tts model root folder
|
||||
"tts_file":"checkpoint_10.pth.tar", // tts checkpoint file
|
||||
"tts_config":"dummy_model_config.json", // tts config.json file
|
||||
"tts_speakers": null, // json file listing speaker ids. null if no speaker embedding.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import unittest
|
||||
|
||||
from utils.text import phonemes
|
||||
from TTS.utils.text import phonemes
|
||||
|
||||
class SymbolsTest(unittest.TestCase):
|
||||
def test_uniqueness(self): #pylint: disable=no-self-use
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import os
|
||||
import unittest
|
||||
|
||||
from tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
||||
from utils.audio import AudioProcessor
|
||||
from utils.generic_utils import load_config
|
||||
from TTS.tests import get_tests_path, get_tests_input_path, get_tests_output_path
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.generic_utils import load_config
|
||||
|
||||
TESTS_PATH = get_tests_path()
|
||||
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
|
||||
|
|
|
@ -3,10 +3,10 @@ import unittest
|
|||
|
||||
import torch as T
|
||||
|
||||
from server.synthesizer import Synthesizer
|
||||
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
|
||||
from utils.text.symbols import phonemes, symbols
|
||||
from utils.generic_utils import load_config, save_checkpoint, setup_model
|
||||
from TTS.server.synthesizer import Synthesizer
|
||||
from TTS.tests import get_tests_input_path, get_tests_output_path
|
||||
from TTS.utils.text.symbols import phonemes, symbols
|
||||
from TTS.utils.generic_utils import load_config, save_checkpoint, setup_model
|
||||
|
||||
|
||||
class DemoServerTest(unittest.TestCase):
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
import unittest
|
||||
import torch as T
|
||||
|
||||
from layers.tacotron import Prenet, CBHG, Decoder, Encoder
|
||||
from layers.losses import L1LossMasked
|
||||
from utils.generic_utils import sequence_mask
|
||||
from TTS.layers.tacotron import Prenet, CBHG, Decoder, Encoder
|
||||
from TTS.layers.losses import L1LossMasked
|
||||
from TTS.utils.generic_utils import sequence_mask
|
||||
|
||||
#pylint: disable=unused-variable
|
||||
|
||||
|
|
|
@ -5,10 +5,10 @@ import torch
|
|||
import numpy as np
|
||||
|
||||
from torch.utils.data import DataLoader
|
||||
from utils.generic_utils import load_config
|
||||
from utils.audio import AudioProcessor
|
||||
from datasets import TTSDataset
|
||||
from datasets.preprocess import ljspeech
|
||||
from TTS.utils.generic_utils import load_config
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.datasets import TTSDataset
|
||||
from TTS.datasets.preprocess import ljspeech
|
||||
|
||||
#pylint: disable=unused-variable
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import unittest
|
||||
import os
|
||||
from tests import get_tests_input_path
|
||||
from TTS.tests import get_tests_input_path
|
||||
|
||||
from datasets.preprocess import common_voice
|
||||
from TTS.datasets.preprocess import common_voice
|
||||
|
||||
|
||||
class TestPreprocessors(unittest.TestCase):
|
||||
|
|
|
@ -6,9 +6,9 @@ import numpy as np
|
|||
|
||||
from torch import optim
|
||||
from torch import nn
|
||||
from utils.generic_utils import load_config
|
||||
from layers.losses import MSELossMasked
|
||||
from models.tacotron2 import Tacotron2
|
||||
from TTS.utils.generic_utils import load_config
|
||||
from TTS.layers.losses import MSELossMasked
|
||||
from TTS.models.tacotron2 import Tacotron2
|
||||
|
||||
#pylint: disable=unused-variable
|
||||
|
||||
|
|
|
@ -5,9 +5,9 @@ import unittest
|
|||
|
||||
from torch import optim
|
||||
from torch import nn
|
||||
from utils.generic_utils import load_config
|
||||
from layers.losses import L1LossMasked
|
||||
from models.tacotron import Tacotron
|
||||
from TTS.utils.generic_utils import load_config
|
||||
from TTS.layers.losses import L1LossMasked
|
||||
from TTS.models.tacotron import Tacotron
|
||||
|
||||
#pylint: disable=unused-variable
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import unittest
|
||||
import torch as T
|
||||
|
||||
from utils.text import *
|
||||
from TTS.utils.text import *
|
||||
|
||||
def test_phoneme_to_sequence():
|
||||
text = "Recent research at Harvard has shown meditating for as little as 8 weeks can actually increase, the grey matter in the parts of the brain responsible for emotional regulation and learning!"
|
||||
|
|
30
train.py
30
train.py
|
@ -10,24 +10,24 @@ import torch.nn as nn
|
|||
from torch import optim
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from datasets.TTSDataset import MyDataset
|
||||
from TTS.datasets.TTSDataset import MyDataset
|
||||
from distribute import (DistributedSampler, apply_gradient_allreduce,
|
||||
init_distributed, reduce_tensor)
|
||||
from layers.losses import L1LossMasked, MSELossMasked
|
||||
from utils.audio import AudioProcessor
|
||||
from utils.generic_utils import (NoamLR, check_update, count_parameters,
|
||||
create_experiment_folder, get_git_branch,
|
||||
load_config, remove_experiment_folder,
|
||||
save_best_model, save_checkpoint, weight_decay,
|
||||
set_init_dict, copy_config_file, setup_model,
|
||||
split_dataset, gradual_training_scheduler)
|
||||
from utils.logger import Logger
|
||||
from utils.speakers import load_speaker_mapping, save_speaker_mapping, \
|
||||
from TTS.layers.losses import L1LossMasked, MSELossMasked
|
||||
from TTS.utils.audio import AudioProcessor
|
||||
from TTS.utils.generic_utils import (NoamLR, check_update, count_parameters,
|
||||
create_experiment_folder, get_git_branch,
|
||||
load_config, remove_experiment_folder,
|
||||
save_best_model, save_checkpoint, weight_decay,
|
||||
set_init_dict, copy_config_file, setup_model,
|
||||
split_dataset, gradual_training_scheduler)
|
||||
from TTS.utils.logger import Logger
|
||||
from TTS.utils.speakers import load_speaker_mapping, save_speaker_mapping, \
|
||||
get_speakers
|
||||
from utils.synthesis import synthesis
|
||||
from utils.text.symbols import phonemes, symbols
|
||||
from utils.visual import plot_alignment, plot_spectrogram
|
||||
from datasets.preprocess import get_preprocessor_by_name
|
||||
from TTS.utils.synthesis import synthesis
|
||||
from TTS.utils.text.symbols import phonemes, symbols
|
||||
from TTS.utils.visual import plot_alignment, plot_spectrogram
|
||||
from TTS.datasets.preprocess import get_preprocessor_by_name
|
||||
|
||||
torch.backends.cudnn.enabled = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
This folder contains a symlink called TTS to the parent folder:
|
||||
|
||||
lrwxr-xr-x TTS -> ..
|
||||
|
||||
This is used to appease the distribute/setuptools gods. When the project was
|
||||
initially set up, the repository folder itself was considered a namespace, and
|
||||
development was done with `sys.path` hacks. This means if you tried to install
|
||||
TTS, `setup.py` would see the packages `models`, `utils`, `layers`... instead of
|
||||
`TTS.models`, `TTS.utils`...
|
||||
|
||||
Installing TTS would then pollute the package namespace with generic names like
|
||||
those above. In order to make things installable in both install and development
|
||||
modes (`pip install /path/to/TTS` and `pip install -e /path/to/TTS`), we needed
|
||||
to add an additional 'TTS' namespace to avoid this pollution. A virtual redirect
|
||||
using `packages_dir` in `setup.py` is not enough because it breaks the editable
|
||||
installation, which can only handle the simplest of `package_dir` redirects.
|
||||
|
||||
Our solution is to use a symlink in order to add the extra `TTS` namespace. In
|
||||
`setup.py`, we only look for packages inside `tts_namespace` (this folder),
|
||||
which contains a symlink called TTS pointing to the repository root. The final
|
||||
result is that `setuptools.find_packages` will find `TTS.models`, `TTS.utils`...
|
||||
|
||||
With this hack, `pip install -e` will then add a symlink to the `tts_namespace`
|
||||
in your `site-packages` folder, which works properly. It's important not to add
|
||||
anything else in this folder because it will pollute the package namespace when
|
||||
installing the project.
|
||||
|
||||
This does not work if you check out your project on a filesystem that does not
|
||||
support symlinks.
|
|
@ -0,0 +1 @@
|
|||
..
|
|
@ -250,7 +250,7 @@ def set_init_dict(model_dict, checkpoint, c):
|
|||
|
||||
def setup_model(num_chars, num_speakers, c):
|
||||
print(" > Using model: {}".format(c.model))
|
||||
MyModel = importlib.import_module('models.' + c.model.lower())
|
||||
MyModel = importlib.import_module('TTS.models.' + c.model.lower())
|
||||
MyModel = getattr(MyModel, c.model)
|
||||
if c.model.lower() in ["tacotron", "tacotrongst"]:
|
||||
model = MyModel(
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
import json
|
||||
|
||||
from datasets.preprocess import get_preprocessor_by_name
|
||||
from TTS.datasets.preprocess import get_preprocessor_by_name
|
||||
|
||||
|
||||
def make_speakers_json_path(out_path):
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
import re
|
||||
import phonemizer
|
||||
from phonemizer.phonemize import phonemize
|
||||
from utils.text import cleaners
|
||||
from utils.text.symbols import symbols, phonemes, _phoneme_punctuations, _bos, \
|
||||
from TTS.utils.text import cleaners
|
||||
from TTS.utils.text.symbols import symbols, phonemes, _phoneme_punctuations, _bos, \
|
||||
_eos
|
||||
|
||||
# Mappings from symbol to numeric ID and vice versa:
|
||||
|
|
|
@ -2,7 +2,7 @@ import librosa
|
|||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
from utils.text import phoneme_to_sequence, sequence_to_phoneme
|
||||
from TTS.utils.text import phoneme_to_sequence, sequence_to_phoneme
|
||||
|
||||
|
||||
def plot_alignment(alignment, info=None):
|
||||
|
|
Loading…
Reference in New Issue