Merge pull request #145 from MycroftAI/db-bootstrap-cleanup

moved population of agreement table and geography tables into bootstrap script
pull/146/head
Chris Veilleux 2019-05-21 14:22:02 -05:00 committed by GitHub
commit df128c9b47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 207 additions and 40 deletions

View File

@ -0,0 +1,11 @@
CREATE TABLE metrics.job (
id uuid PRIMARY KEY
DEFAULT gen_random_uuid(),
job_name text NOT NULL,
batch_date date NOT NULL,
start_ts TIMESTAMP NOT NULL,
end_ts TIMESTAMP NOT NULL,
command text NOT NULL,
success BOOLEAN NOT NULL,
UNIQUE (job_name, start_ts)
)

View File

@ -1,6 +1,7 @@
from glob import glob
from os import path
from os import environ, path, remove
from markdown import markdown
from psycopg2 import connect
MYCROFT_DB_DIR = path.join(path.abspath('..'), 'mycroft')
@ -8,10 +9,8 @@ SCHEMAS = ('account', 'skill', 'device', 'geography', 'metrics')
DB_DESTROY_FILES = (
'drop_mycroft_db.sql',
'drop_template_db.sql',
# 'drop_roles.sql'
)
DB_CREATE_FILES = (
# 'create_roles.sql',
'create_template_db.sql',
)
ACCOUNT_TABLE_ORDER = (
@ -48,6 +47,7 @@ GEOGRAPHY_TABLE_ORDER = (
METRICS_TABLE_ORDER = (
'api',
'job'
)
schema_directory = '{}_schema'
@ -61,32 +61,40 @@ def get_sql_from_file(file_path: str) -> str:
class PostgresDB(object):
def __init__(self, dbname, user, password=None):
self.db = connect(dbname=dbname, user=user, host='127.0.0.1')
# self.db = connect(
# dbname=dbname,
# user=user,
# password=password,
# host='selene-test-db-do-user-1412453-0.db.ondigitalocean.com',
# port=25060,
# sslmode='require'
# )
def __init__(self, db_name, user=None):
db_host = environ['DB_HOST']
db_port = environ['DB_PORT']
db_ssl_mode = environ.get('DB_SSL_MODE')
if db_name in ('postgres', 'defaultdb'):
db_user = environ['POSTGRES_DB_USER']
db_password = environ.get('POSTGRES_DB_PASSWORD')
else:
db_user = environ['MYCROFT_DB_USER']
db_password = environ['MYCROFT_DB_PASSWORD']
if user is not None:
db_user = user
self.db = connect(
dbname=db_name,
user=db_user,
password=db_password,
host=db_host,
port=db_port,
sslmode=db_ssl_mode
)
self.db.autocommit = True
def close_db(self):
self.db.close()
def execute_sql(self, sql: str):
def execute_sql(self, sql: str, args=None):
cursor = self.db.cursor()
cursor.execute(sql)
cursor.execute(sql, args)
return cursor
postgres_db = PostgresDB(dbname='postgres', user='postgres')
# postgres_db = PostgresDB(
# dbname='defaultdb',
# user='doadmin',
# password='l06tn0qi2bjhgcki'
# )
postgres_db = PostgresDB(db_name=environ['POSTGRES_DB_NAME'])
print('Destroying any objects we will be creating later.')
for db_destroy_file in DB_DESTROY_FILES:
@ -94,7 +102,7 @@ for db_destroy_file in DB_DESTROY_FILES:
get_sql_from_file(db_destroy_file)
)
print('Creating the extensions, mycroft database, and selene roles')
print('Creating the mycroft database')
for db_setup_file in DB_CREATE_FILES:
postgres_db.execute_sql(
get_sql_from_file(db_setup_file)
@ -102,13 +110,10 @@ for db_setup_file in DB_CREATE_FILES:
postgres_db.close_db()
template_db = PostgresDB(dbname='mycroft_template', user='mycroft')
# template_db = PostgresDB(
# dbname='mycroft_template',
# user='selene',
# password='ubhemhx1dikmqc5f'
# )
template_db = PostgresDB(db_name='mycroft_template')
print('Creating the extensions')
template_db.execute_sql(
get_sql_from_file(path.join('create_extensions.sql'))
)
@ -193,22 +198,14 @@ for schema in SCHEMAS:
template_db.close_db()
print('Copying template to new database.')
postgres_db = PostgresDB(dbname='postgres', user='mycroft')
# postgres_db = PostgresDB(
# dbname='defaultdb',
# user='doadmin',
# password='l06tn0qi2bjhgcki'
# )
postgres_db = PostgresDB(db_name=environ['POSTGRES_DB_NAME'])
postgres_db.execute_sql(get_sql_from_file('create_mycroft_db.sql'))
postgres_db.close_db()
mycroft_db = PostgresDB(dbname='mycroft', user='mycroft')
# mycroft_db = PostgresDB(
# dbname='mycroft_template',
# user='selene',
# password='ubhemhx1dikmqc5f'
# )
mycroft_db = PostgresDB(db_name=environ['MYCROFT_DB_NAME'])
insert_files = [
dict(schema_dir='account_schema', file_name='membership.sql'),
dict(schema_dir='device_schema', file_name='text_to_speech.sql'),
@ -226,3 +223,162 @@ for insert_file in insert_files:
)
except FileNotFoundError:
pass
print('Building account.agreement table')
mycroft_db.db.autocommit = False
insert_sql = (
"insert into account.agreement VALUES (default, '{}', '1', '[today,]', {})"
)
doc_dir = '/Users/chrisveilleux/Mycroft/github/documentation/_pages/'
docs = {
'Privacy Policy': doc_dir + 'embed-privacy-policy.md',
'Terms of Use': doc_dir + 'embed-terms-of-use.md'
}
try:
for agrmt_type, doc_path in docs.items():
lobj = mycroft_db.db.lobject(0, 'b')
with open(doc_path) as doc:
header_delimiter_count = 0
while True:
rec = doc.readline()
if rec == '---\n':
header_delimiter_count += 1
if header_delimiter_count == 2:
break
doc_html = markdown(
doc.read(),
output_format='html5'
)
lobj.write(doc_html)
mycroft_db.execute_sql(
insert_sql.format(agrmt_type, lobj.oid)
)
mycroft_db.execute_sql(
"grant select on large object {} to selene".format(lobj.oid)
)
mycroft_db.execute_sql(
insert_sql.format('Open Dataset', 'null')
)
except:
mycroft_db.db.rollback()
raise
else:
mycroft_db.db.commit()
mycroft_db.db.autocommit = True
reference_file_dir = '/Users/chrisveilleux/Mycroft'
print('Building geography.country table')
country_file = 'country.txt'
country_insert = """
INSERT INTO
geography.country (iso_code, name)
VALUES
('{iso_code}', '{country_name}')
"""
with open(path.join(reference_file_dir, country_file)) as countries:
while True:
rec = countries.readline()
if rec.startswith('#ISO'):
break
for country in countries.readlines():
country_fields = country.split('\t')
insert_args = dict(
iso_code=country_fields[0],
country_name=country_fields[4]
)
mycroft_db.execute_sql(country_insert.format(**insert_args))
print('Building geography.region table')
region_file = 'regions.txt'
region_insert = """
INSERT INTO
geography.region (country_id, region_code, name)
VALUES
(
(SELECT id FROM geography.country WHERE iso_code = %(iso_code)s),
%(region_code)s,
%(region_name)s)
"""
with open(path.join(reference_file_dir, region_file)) as regions:
for region in regions.readlines():
region_fields = region.split('\t')
country_iso_code = region_fields[0][:2]
insert_args = dict(
iso_code=country_iso_code,
region_code=region_fields[0],
region_name=region_fields[1]
)
mycroft_db.execute_sql(region_insert, insert_args)
print('Building geography.timezone table')
timezone_file = 'timezones.txt'
timezone_insert = """
INSERT INTO
geography.timezone (country_id, name, gmt_offset, dst_offset)
VALUES
(
(SELECT id FROM geography.country WHERE iso_code = %(iso_code)s),
%(timezone_name)s,
%(gmt_offset)s,
%(dst_offset)s
)
"""
with open(path.join(reference_file_dir, timezone_file)) as timezones:
timezones.readline()
for timezone in timezones.readlines():
timezone_fields = timezone.split('\t')
insert_args = dict(
iso_code=timezone_fields[0],
timezone_name=timezone_fields[1],
gmt_offset=timezone_fields[2],
dst_offset=timezone_fields[3]
)
mycroft_db.execute_sql(timezone_insert, insert_args)
print('Building geography.city table')
cities_file = 'cities500.txt'
region_query = "SELECT id, region_code FROM geography.region"
query_result = mycroft_db.execute_sql(region_query)
region_lookup = dict()
for row in query_result.fetchall():
region_lookup[row[1]] = row[0]
timezone_query = "SELECT id, name FROM geography.timezone"
query_result = mycroft_db.execute_sql(timezone_query)
timezone_lookup = dict()
for row in query_result.fetchall():
timezone_lookup[row[1]] = row[0]
# city_insert = """
# INSERT INTO
# geography.city (region_id, timezone_id, name, latitude, longitude)
# VALUES
# (%(region_id)s, %(timezone_id)s, %(city_name)s, %(latitude)s, %(longitude)s)
# """
with open(path.join(reference_file_dir, cities_file)) as cities:
with open(path.join(reference_file_dir, 'city.dump'), 'w') as dump_file:
for city in cities.readlines():
city_fields = city.split('\t')
city_region = city_fields[8] + '.' + city_fields[10]
region_id = region_lookup.get(city_region)
timezone_id = timezone_lookup[city_fields[17]]
if region_id is not None:
dump_file.write('\t'.join([
region_id,
timezone_id,
city_fields[1],
city_fields[4],
city_fields[5]
]) + '\n')
# mycroft_db.execute_sql(city_insert, insert_args)
with open(path.join(reference_file_dir, 'city.dump')) as dump_file:
cursor = mycroft_db.db.cursor()
cursor.copy_from(dump_file, 'geography.city', columns=(
'region_id', 'timezone_id', 'name', 'latitude', 'longitude')
)
remove(path.join(reference_file_dir, 'city.dump'))
mycroft_db.close_db()