fix linter

pull/1032/head
WeberJulian 2021-11-02 19:10:18 +01:00 committed by Eren Gölge
parent 3e9ca4b95d
commit 215a74b32e
6 changed files with 14 additions and 22 deletions

View File

@ -3,7 +3,6 @@ import argparse
import multiprocessing import multiprocessing
from argparse import RawTextHelpFormatter from argparse import RawTextHelpFormatter
import numpy
from tqdm.contrib.concurrent import process_map from tqdm.contrib.concurrent import process_map
from TTS.config import load_config from TTS.config import load_config

View File

@ -6,12 +6,7 @@ import glob
import multiprocessing import multiprocessing
import os import os
import pathlib import pathlib
import sys
import wave import wave
from itertools import chain
import numpy as np
import tqdm
import webrtcvad import webrtcvad
from tqdm.contrib.concurrent import process_map from tqdm.contrib.concurrent import process_map
@ -47,8 +42,8 @@ def write_wave(path, audio, sample_rate):
class Frame(object): class Frame(object):
"""Represents a "frame" of audio data.""" """Represents a "frame" of audio data."""
def __init__(self, bytes, timestamp, duration): def __init__(self, _bytes, timestamp, duration):
self.bytes = bytes self.bytes =_bytes
self.timestamp = timestamp self.timestamp = timestamp
self.duration = duration self.duration = duration
@ -121,7 +116,7 @@ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, fram
# We want to yield all the audio we see from now until # We want to yield all the audio we see from now until
# we are NOTTRIGGERED, but we have to start with the # we are NOTTRIGGERED, but we have to start with the
# audio that's already in the ring buffer. # audio that's already in the ring buffer.
for f, s in ring_buffer: for f, _ in ring_buffer:
voiced_frames.append(f) voiced_frames.append(f)
ring_buffer.clear() ring_buffer.clear()
else: else:
@ -146,11 +141,10 @@ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, fram
def remove_silence(filepath): def remove_silence(filepath):
filename = os.path.basename(filepath)
output_path = filepath.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, "")) output_path = filepath.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, ""))
# ignore if the file exists # ignore if the file exists
if os.path.exists(output_path) and not args.force: if os.path.exists(output_path) and not args.force:
return False return
# create all directory structure # create all directory structure
pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True) pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True)
padding_duration_ms = 300 # default 300 padding_duration_ms = 300 # default 300
@ -166,7 +160,7 @@ def remove_silence(filepath):
if num_segments != 0: if num_segments != 0:
for i, segment in reversed(list(enumerate(segments))): for i, segment in reversed(list(enumerate(segments))):
if i >= 1: if i >= 1:
if flag == False: if not flag:
concat_segment = segment concat_segment = segment
flag = True flag = True
else: else:
@ -176,11 +170,12 @@ def remove_silence(filepath):
segment = segment + concat_segment segment = segment + concat_segment
write_wave(output_path, segment, sample_rate) write_wave(output_path, segment, sample_rate)
print(output_path) print(output_path)
return True return
else: else:
print("> Just Copying the file to:", output_path) print("> Just Copying the file to:", output_path)
# if fail to remove silence just write the file # if fail to remove silence just write the file
write_wave(output_path, audio, sample_rate) write_wave(output_path, audio, sample_rate)
return
def preprocess_audios(): def preprocess_audios():
@ -198,11 +193,9 @@ def preprocess_audios():
if __name__ == "__main__": if __name__ == "__main__":
""" parser = argparse.ArgumentParser(
usage description="python remove_silence.py -i=VCTK-Corpus-bk/ -o=../VCTK-Corpus-removed-silence -g=wav48/*/*.wav -a=2"
python remove_silence.py -i=VCTK-Corpus-bk/ -o=../VCTK-Corpus-removed-silence -g=wav48/*/*.wav -a=2 )
"""
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input_dir", type=str, default="../VCTK-Corpus", help="Dataset root dir") parser.add_argument("-i", "--input_dir", type=str, default="../VCTK-Corpus", help="Dataset root dir")
parser.add_argument( parser.add_argument(
"-o", "--output_dir", type=str, default="../VCTK-Corpus-removed-silence", help="Output Dataset dir" "-o", "--output_dir", type=str, default="../VCTK-Corpus-removed-silence", help="Output Dataset dir"

View File

@ -59,7 +59,7 @@ def mozilla_de(root_path, meta_file, **kwargs): # pylint: disable=unused-argume
return items return items
def mailabs(root_path, meta_files=None): def mailabs(root_path, meta_files=None, ununsed_speakers=None):
"""Normalizes M-AI-Labs meta data files to TTS format """Normalizes M-AI-Labs meta data files to TTS format
Args: Args:

View File

@ -1,5 +1,4 @@
import math import math
import random
from dataclasses import dataclass, field from dataclasses import dataclass, field
from itertools import chain from itertools import chain
from typing import Dict, List, Tuple from typing import Dict, List, Tuple
@ -747,7 +746,7 @@ class Vits(BaseTTS):
# inverse decoder and get the output # inverse decoder and get the output
z_f_pred = self.flow(z_ft, y_mask, g=g, reverse=True) z_f_pred = self.flow(z_ft, y_mask, g=g, reverse=True)
z_slice, slice_ids = rand_segment(z_f_pred, y_lengths, self.spec_segment_size) z_slice, slice_ids = rand_segments(z_f_pred, y_lengths, self.spec_segment_size)
o = self.waveform_decoder(z_slice, g=g) o = self.waveform_decoder(z_slice, g=g)

View File

@ -7,6 +7,7 @@ import fsspec
import numpy as np import numpy as np
import torch import torch
from coqpit import Coqpit from coqpit import Coqpit
from torch.utils.data.sampler import WeightedRandomSampler
from TTS.config import load_config from TTS.config import load_config
from TTS.speaker_encoder.utils.generic_utils import setup_model from TTS.speaker_encoder.utils.generic_utils import setup_model

View File

@ -180,7 +180,7 @@ def plot_phonemes(train_path, cmu_dict_path, save_path):
plt.figure() plt.figure()
plt.rcParams["figure.figsize"] = (50, 20) plt.rcParams["figure.figsize"] = (50, 20)
barplot = sns.barplot(x, y) barplot = sns.barplot(x=x, y=y)
if save_path: if save_path:
fig = barplot.get_figure() fig = barplot.get_figure()
fig.savefig(os.path.join(save_path, "phoneme_dist")) fig.savefig(os.path.join(save_path, "phoneme_dist"))