fix linter

2021-11-02 19:10:18 +01:00 · 2021-11-02 19:10:18 +01:00 · 215a74b32e
parent 3e9ca4b95d
commit 215a74b32e
6 changed files with 14 additions and 22 deletions
--- a/TTS/bin/find_unique_phonemes.py
+++ b/TTS/bin/find_unique_phonemes.py
@ -3,7 +3,6 @@ import argparse
 import multiprocessing
 from argparse import RawTextHelpFormatter

-import numpy
 from tqdm.contrib.concurrent import process_map

 from TTS.config import load_config
--- a/TTS/bin/remove_silence_using_vad.py
+++ b/TTS/bin/remove_silence_using_vad.py
@ -6,12 +6,7 @@ import glob
 import multiprocessing
 import os
 import pathlib
-import sys
 import wave
-from itertools import chain
-
-import numpy as np
-import tqdm
 import webrtcvad
 from tqdm.contrib.concurrent import process_map

@ -47,8 +42,8 @@ def write_wave(path, audio, sample_rate):
 class Frame(object):
    """Represents a "frame" of audio data."""

-    def __init__(self, bytes, timestamp, duration):
-        self.bytes = bytes
+    def __init__(self, _bytes, timestamp, duration):
+        self.bytes =_bytes
        self.timestamp = timestamp
        self.duration = duration

@ -121,7 +116,7 @@ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, fram
                # We want to yield all the audio we see from now until
                # we are NOTTRIGGERED, but we have to start with the
                # audio that's already in the ring buffer.
-                for f, s in ring_buffer:
+                for f, _ in ring_buffer:
                    voiced_frames.append(f)
                ring_buffer.clear()
        else:
@ -146,11 +141,10 @@ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, fram


 def remove_silence(filepath):
-    filename = os.path.basename(filepath)
    output_path = filepath.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, ""))
    # ignore if the file exists
    if os.path.exists(output_path) and not args.force:
-        return False
+        return
    # create all directory structure
    pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True)
    padding_duration_ms = 300  # default 300
@ -166,7 +160,7 @@ def remove_silence(filepath):
    if num_segments != 0:
        for i, segment in reversed(list(enumerate(segments))):
            if i >= 1:
-                if flag == False:
+                if not flag:
                    concat_segment = segment
                    flag = True
                else:
@ -176,11 +170,12 @@ def remove_silence(filepath):
                    segment = segment + concat_segment
                write_wave(output_path, segment, sample_rate)
                print(output_path)
-                return True
+                return
    else:
        print("> Just Copying the file to:", output_path)
        # if fail to remove silence just write the file
        write_wave(output_path, audio, sample_rate)
+        return


 def preprocess_audios():
@ -198,11 +193,9 @@ def preprocess_audios():


 if __name__ == "__main__":
-    """
-    usage
-    python remove_silence.py -i=VCTK-Corpus-bk/ -o=../VCTK-Corpus-removed-silence -g=wav48/*/*.wav -a=2
-    """
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(
+        description="python remove_silence.py -i=VCTK-Corpus-bk/ -o=../VCTK-Corpus-removed-silence -g=wav48/*/*.wav -a=2"
+    )
    parser.add_argument("-i", "--input_dir", type=str, default="../VCTK-Corpus", help="Dataset root dir")
    parser.add_argument(
        "-o", "--output_dir", type=str, default="../VCTK-Corpus-removed-silence", help="Output Dataset dir"
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@ -59,7 +59,7 @@ def mozilla_de(root_path, meta_file, **kwargs):  # pylint: disable=unused-argume
    return items


-def mailabs(root_path, meta_files=None):
+def mailabs(root_path, meta_files=None, ununsed_speakers=None):
    """Normalizes M-AI-Labs meta data files to TTS format

    Args:
--- a/TTS/tts/models/vits.py
+++ b/TTS/tts/models/vits.py
@ -1,5 +1,4 @@
 import math
-import random
 from dataclasses import dataclass, field
 from itertools import chain
 from typing import Dict, List, Tuple
@ -747,7 +746,7 @@ class Vits(BaseTTS):

            # inverse decoder and get the output
            z_f_pred = self.flow(z_ft, y_mask, g=g, reverse=True)
-            z_slice, slice_ids = rand_segment(z_f_pred, y_lengths, self.spec_segment_size)
+            z_slice, slice_ids = rand_segments(z_f_pred, y_lengths, self.spec_segment_size)

        o = self.waveform_decoder(z_slice, g=g)

--- a/TTS/tts/utils/speakers.py
+++ b/TTS/tts/utils/speakers.py
@ -7,6 +7,7 @@ import fsspec
 import numpy as np
 import torch
 from coqpit import Coqpit
+from torch.utils.data.sampler import WeightedRandomSampler

 from TTS.config import load_config
 from TTS.speaker_encoder.utils.generic_utils import setup_model
--- a/notebooks/dataset_analysis/analyze.py
+++ b/notebooks/dataset_analysis/analyze.py
@ -180,7 +180,7 @@ def plot_phonemes(train_path, cmu_dict_path, save_path):

    plt.figure()
    plt.rcParams["figure.figsize"] = (50, 20)
-    barplot = sns.barplot(x, y)
+    barplot = sns.barplot(x=x, y=y)
    if save_path:
        fig = barplot.get_figure()
        fig.savefig(os.path.join(save_path, "phoneme_dist"))