partial model initialization

2018-12-11 17:53:08 +01:00 · 2018-12-11 17:53:08 +01:00 · 8d865629a0
parent 619c73f0f1
commit 8d865629a0
2 changed files with 17 additions and 7 deletions
--- a/layers/tacotron.py
+++ b/layers/tacotron.py
@ -339,11 +339,10 @@ class Decoder(nn.Module):

    def _reshape_memory(self, memory):
        B = memory.shape[0]
-        if memory is not None:
-            # Grouping multiple frames if necessary
-            if memory.size(-1) == self.memory_dim:
-                memory = memory.contiguous()
-                memory = memory.view(B, memory.size(1) // self.r, -1)
+        # Grouping multiple frames if necessary
+        if memory.size(-1) == self.memory_dim:
+            memory = memory.contiguous()
+            memory = memory.view(B, memory.size(1) // self.r, -1)
        # Time first (T_decoder, B, memory_dim)
        memory = memory.transpose(0, 1)
        return memory
@ -370,7 +369,8 @@ class Decoder(nn.Module):
        T = inputs.size(1)
        # Run greedy decoding if memory is None
        greedy = not self.training
-        memory = self._reshape_memory(memory)
+        if memory is not None:
+            memory = self._reshape_memory(memory)
        T_decoder = memory.size(0)
        # go frame as zeros matrix
        initial_memory = inputs.data.new(B, self.memory_dim * self.r).zero_()
@ -461,4 +461,4 @@ class StopNet(nn.Module):
        outputs = self.dropout(inputs)
        outputs = self.linear(outputs)
        outputs = self.sigmoid(outputs)
-        return outputs
+        return outputs
--- a/train.py
+++ b/train.py
@ -401,6 +401,16 @@ def main(args):
    if args.restore_path:
        checkpoint = torch.load(args.restore_path)
        model.load_state_dict(checkpoint['model'])
+        # Partial initialization: if there is a mismatch with new and old layer, it is skipped.
+        # 1. filter out unnecessary keys
+        pretrained_dict = {
+            k: v
+            for k, v in checkpoint['model'].items() if k in model_dict
+        }
+        # 2. overwrite entries in the existing state dict
+        model_dict.update(pretrained_dict)
+        # 3. load the new state dict
+        model.load_state_dict(model_dict)
        if use_cuda:
            model = model.cuda()
            criterion.cuda()