mycroft-core/mycroft/skills/skill_data.py

231 lines
6.5 KiB
Python

# Copyright 2018 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Module containing methods needed to load skill data such as intents and
regular expressions.
"""
from os import walk
from os.path import splitext, join
import re
import csv
import collections
from mycroft.messagebus.message import Message
from mycroft.util.format import expand_options
def read_vocab_file(path):
""" Read voc file.
This reads a .voc file, stripping out empty lines comments and expand
parentheses. It retruns each line as a list of all expanded
alternatives.
Arguments:
path (str): path to vocab file.
Returns:
List of Lists of strings.
"""
vocab = []
with open(path, 'r', encoding='utf8') as voc_file:
for line in voc_file.readlines():
if line.startswith('#') or line.strip() == '':
continue
vocab.append(expand_options(line.lower()))
return vocab
def load_regex_from_file(path, skill_id):
"""Load regex from file
The regex is sent to the intent handler using the message bus
Args:
path: path to vocabulary file (*.voc)
skill_id: skill_id to the regex is tied to
"""
regexes = []
if path.endswith('.rx'):
with open(path, 'r', encoding='utf8') as reg_file:
for line in reg_file.readlines():
if line.startswith("#"):
continue
regex = munge_regex(line.strip(), skill_id)
# Raise error if regex can't be compiled
re.compile(regex)
regexes.append(regex)
return regexes
def load_vocabulary(basedir, skill_id):
"""Load vocabulary from all files in the specified directory.
Arguments:
basedir (str): path of directory to load from (will recurse)
skill_id: skill the data belongs to
Returns:
dict with intent_type as keys and list of list of lists as value.
"""
vocabs = {}
for path, _, files in walk(basedir):
for f in files:
if f.endswith(".voc"):
vocab_type = to_alnum(skill_id) + splitext(f)[0]
vocs = read_vocab_file(join(path, f))
if vocs:
vocabs[vocab_type] = vocs
return vocabs
def load_regex(basedir, skill_id):
"""Load regex from all files in the specified directory.
Args:
basedir (str): path of directory to load from
bus (messagebus emitter): messagebus instance used to send the vocab to
the intent service
skill_id (str): skill identifier
"""
regexes = []
for path, _, files in walk(basedir):
for f in files:
if f.endswith(".rx"):
regexes += load_regex_from_file(join(path, f), skill_id)
return regexes
def to_alnum(skill_id):
"""Convert a skill id to only alphanumeric characters
Non alpha-numeric characters are converted to "_"
Args:
skill_id (str): identifier to be converted
Returns:
(str) String of letters
"""
return ''.join(c if c.isalnum() else '_' for c in str(skill_id))
def munge_regex(regex, skill_id):
"""Insert skill id as letters into match groups.
Args:
regex (str): regex string
skill_id (str): skill identifier
Returns:
(str) munged regex
"""
base = '(?P<' + to_alnum(skill_id)
return base.join(regex.split('(?P<'))
def munge_intent_parser(intent_parser, name, skill_id):
"""Rename intent keywords to make them skill exclusive
This gives the intent parser an exclusive name in the
format <skill_id>:<name>. The keywords are given unique
names in the format <Skill id as letters><Intent name>.
The function will not munge instances that's already been
munged
Args:
intent_parser: (IntentParser) object to update
name: (str) Skill name
skill_id: (int) skill identifier
"""
# Munge parser name
if str(skill_id) + ':' not in name:
intent_parser.name = str(skill_id) + ':' + name
else:
intent_parser.name = name
# Munge keywords
skill_id = to_alnum(skill_id)
# Munge required keyword
reqs = []
for i in intent_parser.requires:
if skill_id not in i[0]:
kw = (skill_id + i[0], skill_id + i[0])
reqs.append(kw)
else:
reqs.append(i)
intent_parser.requires = reqs
# Munge optional keywords
opts = []
for i in intent_parser.optional:
if skill_id not in i[0]:
kw = (skill_id + i[0], skill_id + i[0])
opts.append(kw)
else:
opts.append(i)
intent_parser.optional = opts
# Munge at_least_one keywords
at_least_one = []
for i in intent_parser.at_least_one:
element = [skill_id + e.replace(skill_id, '') for e in i]
at_least_one.append(tuple(element))
intent_parser.at_least_one = at_least_one
def read_value_file(filename, delim):
"""Read value file.
The value file is a simple csv structure with a key and value.
Arguments:
filename (str): file to read
delim (str): csv delimiter
Returns:
OrderedDict with results.
"""
result = collections.OrderedDict()
if filename:
with open(filename) as f:
reader = csv.reader(f, delimiter=delim)
for row in reader:
# skip blank or comment lines
if not row or row[0].startswith("#"):
continue
if len(row) != 2:
continue
result[row[0]] = row[1]
return result
def read_translated_file(filename, data):
"""Read a file inserting data.
Arguments:
filename (str): file to read
data (dict): dictionary with data to insert into file
Returns:
list of lines.
"""
if filename:
with open(filename) as f:
text = f.read().replace('{{', '{').replace('}}', '}')
return text.format(**data or {}).rstrip('\n').split('\n')
else:
return None