Use `synthesize` when exists

pull/2592/head
Eren G??lge 2023-05-06 11:58:59 +02:00
parent 90b33e398f
commit a4860eac9a
3 changed files with 7 additions and 5 deletions

View File

@ -342,7 +342,9 @@ class TTS:
def download_model_by_name(self, model_name: str):
model_path, config_path, model_item = self.manager.download_model(model_name)
if model_path.split("--")[-1] == "tortoise-v2":
if isinstance(model_item["github_rls_url"], list):
# return model directory if there are multiple files
# we assume that the model knows how to load itself
return None, None, None, None, model_path
if model_item.get("default_vocoder") is None:
return model_path, config_path, None, None

View File

@ -450,7 +450,7 @@ class Tortoise(BaseTTS):
with torch.no_grad():
return self.rlg_auto(torch.tensor([0.0])), self.rlg_diffusion(torch.tensor([0.0]))
def synthesis(self, text, config, speaker_id="lj", **kwargs):
def synthesize(self, text, config, speaker_id="lj", **kwargs):
voice_samples, conditioning_latents = load_voice(speaker_id)
outputs = self.inference_with_config(

View File

@ -333,15 +333,15 @@ class Synthesizer(object):
)
# compute a new d_vector from the given clip.
if speaker_wav is not None:
if speaker_wav is not None and self.tts_model.speaker_manager is not None:
speaker_embedding = self.tts_model.speaker_manager.compute_embedding_from_clip(speaker_wav)
use_gl = self.vocoder_model is None
if not reference_wav:
for sen in sens:
if self.tts_config.model == "tortoise":
outputs = self.tts_model.synthesis(text=sen, config=self.tts_config, **kwargs)
if hasattr(self.tts_model, "synthesize"):
outputs = self.tts_model.synthesize(text=sen, config=self.tts_config, **kwargs)
else:
# synthesize voice
outputs = synthesis(