diff --git a/TTS/api.py b/TTS/api.py
index 554fec80..13f71c84 100644
--- a/TTS/api.py
+++ b/TTS/api.py
@@ -342,7 +342,9 @@ class TTS:
 
     def download_model_by_name(self, model_name: str):
         model_path, config_path, model_item = self.manager.download_model(model_name)
-        if model_path.split("--")[-1] == "tortoise-v2":
+        if isinstance(model_item["github_rls_url"], list):
+            # return model directory if there are multiple files
+            # we assume that the model knows how to load itself
             return None, None, None, None, model_path
         if model_item.get("default_vocoder") is None:
             return model_path, config_path, None, None
diff --git a/TTS/tts/models/tortoise.py b/TTS/tts/models/tortoise.py
index 16808f9b..3403d8a2 100644
--- a/TTS/tts/models/tortoise.py
+++ b/TTS/tts/models/tortoise.py
@@ -450,7 +450,7 @@ class Tortoise(BaseTTS):
         with torch.no_grad():
             return self.rlg_auto(torch.tensor([0.0])), self.rlg_diffusion(torch.tensor([0.0]))
 
-    def synthesis(self, text, config, speaker_id="lj", **kwargs):
+    def synthesize(self, text, config, speaker_id="lj", **kwargs):
         voice_samples, conditioning_latents = load_voice(speaker_id)
 
         outputs = self.inference_with_config(
diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py
index 8b50e11d..a7a68eb2 100644
--- a/TTS/utils/synthesizer.py
+++ b/TTS/utils/synthesizer.py
@@ -333,15 +333,15 @@ class Synthesizer(object):
                 )
 
         # compute a new d_vector from the given clip.
-        if speaker_wav is not None:
+        if speaker_wav is not None and self.tts_model.speaker_manager is not None:
             speaker_embedding = self.tts_model.speaker_manager.compute_embedding_from_clip(speaker_wav)
 
         use_gl = self.vocoder_model is None
 
         if not reference_wav:
             for sen in sens:
-                if self.tts_config.model == "tortoise":
-                    outputs = self.tts_model.synthesis(text=sen, config=self.tts_config, **kwargs)
+                if hasattr(self.tts_model, "synthesize"):
+                    outputs = self.tts_model.synthesize(text=sen, config=self.tts_config, **kwargs)
                 else:
                     # synthesize voice
                     outputs = synthesis(