Change database to simple text file

pull/10/head
Matthew D. Scholefield 2018-03-16 02:06:00 -05:00
parent 09e8b91ad2
commit 0fe1bceede
3 changed files with 20 additions and 17 deletions

View File

@ -50,19 +50,18 @@ class TrainData:
@classmethod
def from_db(cls, db_file: str, db_folder: str) -> 'TrainData':
"""
Load a set of data from an SQLite database in the following format:
Column: "final_tag"
Value: "wake-word" or "not-wake-word"
Load a set of data from a text database in the following format:
<file_id> (tab) <tag>
<file_id> (tab) <tag>
Column: "data_id"
Value: identifier of file such that the following
file exists: {db_folder}/{data_id}.wav
file_id: identifier of file such that the following
file exists: {db_folder}/{data_id}.wav
tag: "wake-word" or "not-wake-word"
"""
if not db_file:
return cls(([], []), ([], []))
if not isfile(db_file):
raise RuntimeError('Database file does not exist: ' + db_file)
import dataset
train_groups = {}
train_group_file = db_file.replace('db', '') + 'groups.json'
@ -70,14 +69,19 @@ class TrainData:
with open(train_group_file) as f:
train_groups = json.load(f)
db = dataset.connect('sqlite:///' + db_file)
files = [
[join(db_folder, i['data_id'] + '.wav') for i in db['data'].find(final_tag=tag)]
for tag in ['wake-word', 'not-wake-word']
]
db_files = {
'wake-word': [],
'not-wake-word': []
}
with open(db_file) as f:
for line in f.read().split('\n'):
if not line:
continue
file, tag = line.split('\t')
db_files[tag.strip()].append(file.strip())
train_files, test_files = ([], []), ([], [])
for label, rows in enumerate(files):
for label, rows in enumerate([db_files['wake-word'], db_files['not-wake-word']]):
for fn in rows:
if not isfile(fn):
continue
@ -135,7 +139,9 @@ class TrainData:
def parse_args(parser: ArgumentParser) -> Any:
"""Return parsed args from parser, adding options for train data inputs"""
parser.add_argument('db_folder', help='Folder to load database references from')
parser.add_argument('-db', '--db-file', default='', help='Database file to use')
parser.add_argument(
'-db', '--db-file', default='', help='Text database to load from where '
'each line is <file_id>\t(wake-word|not-wake-word) and {db_folder}/<file_id>.wav exists..')
parser.add_argument('-d', '--data-dir', default='{db_folder}',
help='Load files from a different directory')
args = parser.parse_args()

View File

@ -4,7 +4,6 @@ altgraph==0.15
banal==0.3.3
bleach==1.5.0
chardet==3.0.4
dataset==1.0.5
enum-compat==0.0.2
future==0.16.0
h5py==2.7.1
@ -27,7 +26,6 @@ PyYAML==3.12
scipy==1.0.0
six==1.11.0
speechpy==2.1
SQLAlchemy==1.2.2
tensorflow==1.4.1
tensorflow-tensorboard==0.4.0
typing==3.6.4

View File

@ -48,7 +48,6 @@ setup(
'h5py',
'wavio',
'typing',
'dataset',
'prettyparse',
'precise-runner'
],