From fe2e8bc7e799d4d11d927ec7681c0c3117357bbe Mon Sep 17 00:00:00 2001 From: Chris Veilleux Date: Tue, 21 May 2019 14:18:36 -0500 Subject: [PATCH] moved population of account.agreement table and geography tables from python notebooks into the bootstrap script --- db/mycroft/metrics_schema/tables/job.sql | 11 ++ db/scripts/bootstrap_mycroft_db.py | 236 +++++++++++++++++++---- 2 files changed, 207 insertions(+), 40 deletions(-) create mode 100644 db/mycroft/metrics_schema/tables/job.sql diff --git a/db/mycroft/metrics_schema/tables/job.sql b/db/mycroft/metrics_schema/tables/job.sql new file mode 100644 index 00000000..8dbd8654 --- /dev/null +++ b/db/mycroft/metrics_schema/tables/job.sql @@ -0,0 +1,11 @@ +CREATE TABLE metrics.job ( + id uuid PRIMARY KEY + DEFAULT gen_random_uuid(), + job_name text NOT NULL, + batch_date date NOT NULL, + start_ts TIMESTAMP NOT NULL, + end_ts TIMESTAMP NOT NULL, + command text NOT NULL, + success BOOLEAN NOT NULL, + UNIQUE (job_name, start_ts) +) diff --git a/db/scripts/bootstrap_mycroft_db.py b/db/scripts/bootstrap_mycroft_db.py index 9136dd53..0b94f5bb 100644 --- a/db/scripts/bootstrap_mycroft_db.py +++ b/db/scripts/bootstrap_mycroft_db.py @@ -1,6 +1,7 @@ from glob import glob -from os import path +from os import environ, path, remove +from markdown import markdown from psycopg2 import connect MYCROFT_DB_DIR = path.join(path.abspath('..'), 'mycroft') @@ -8,10 +9,8 @@ SCHEMAS = ('account', 'skill', 'device', 'geography', 'metrics') DB_DESTROY_FILES = ( 'drop_mycroft_db.sql', 'drop_template_db.sql', - # 'drop_roles.sql' ) DB_CREATE_FILES = ( - # 'create_roles.sql', 'create_template_db.sql', ) ACCOUNT_TABLE_ORDER = ( @@ -48,6 +47,7 @@ GEOGRAPHY_TABLE_ORDER = ( METRICS_TABLE_ORDER = ( 'api', + 'job' ) schema_directory = '{}_schema' @@ -61,32 +61,40 @@ def get_sql_from_file(file_path: str) -> str: class PostgresDB(object): - def __init__(self, dbname, user, password=None): - self.db = connect(dbname=dbname, user=user, host='127.0.0.1') - # self.db = connect( - # dbname=dbname, - # user=user, - # password=password, - # host='selene-test-db-do-user-1412453-0.db.ondigitalocean.com', - # port=25060, - # sslmode='require' - # ) + def __init__(self, db_name, user=None): + db_host = environ['DB_HOST'] + db_port = environ['DB_PORT'] + db_ssl_mode = environ.get('DB_SSL_MODE') + if db_name in ('postgres', 'defaultdb'): + db_user = environ['POSTGRES_DB_USER'] + db_password = environ.get('POSTGRES_DB_PASSWORD') + else: + db_user = environ['MYCROFT_DB_USER'] + db_password = environ['MYCROFT_DB_PASSWORD'] + + if user is not None: + db_user = user + + self.db = connect( + dbname=db_name, + user=db_user, + password=db_password, + host=db_host, + port=db_port, + sslmode=db_ssl_mode + ) self.db.autocommit = True def close_db(self): self.db.close() - def execute_sql(self, sql: str): + def execute_sql(self, sql: str, args=None): cursor = self.db.cursor() - cursor.execute(sql) + cursor.execute(sql, args) + return cursor -postgres_db = PostgresDB(dbname='postgres', user='postgres') -# postgres_db = PostgresDB( -# dbname='defaultdb', -# user='doadmin', -# password='l06tn0qi2bjhgcki' -# ) +postgres_db = PostgresDB(db_name=environ['POSTGRES_DB_NAME']) print('Destroying any objects we will be creating later.') for db_destroy_file in DB_DESTROY_FILES: @@ -94,7 +102,7 @@ for db_destroy_file in DB_DESTROY_FILES: get_sql_from_file(db_destroy_file) ) -print('Creating the extensions, mycroft database, and selene roles') +print('Creating the mycroft database') for db_setup_file in DB_CREATE_FILES: postgres_db.execute_sql( get_sql_from_file(db_setup_file) @@ -102,13 +110,10 @@ for db_setup_file in DB_CREATE_FILES: postgres_db.close_db() -template_db = PostgresDB(dbname='mycroft_template', user='mycroft') -# template_db = PostgresDB( -# dbname='mycroft_template', -# user='selene', -# password='ubhemhx1dikmqc5f' -# ) +template_db = PostgresDB(db_name='mycroft_template') + +print('Creating the extensions') template_db.execute_sql( get_sql_from_file(path.join('create_extensions.sql')) ) @@ -193,22 +198,14 @@ for schema in SCHEMAS: template_db.close_db() + print('Copying template to new database.') -postgres_db = PostgresDB(dbname='postgres', user='mycroft') -# postgres_db = PostgresDB( -# dbname='defaultdb', -# user='doadmin', -# password='l06tn0qi2bjhgcki' -# ) +postgres_db = PostgresDB(db_name=environ['POSTGRES_DB_NAME']) postgres_db.execute_sql(get_sql_from_file('create_mycroft_db.sql')) postgres_db.close_db() -mycroft_db = PostgresDB(dbname='mycroft', user='mycroft') -# mycroft_db = PostgresDB( -# dbname='mycroft_template', -# user='selene', -# password='ubhemhx1dikmqc5f' -# ) + +mycroft_db = PostgresDB(db_name=environ['MYCROFT_DB_NAME']) insert_files = [ dict(schema_dir='account_schema', file_name='membership.sql'), dict(schema_dir='device_schema', file_name='text_to_speech.sql'), @@ -226,3 +223,162 @@ for insert_file in insert_files: ) except FileNotFoundError: pass + +print('Building account.agreement table') +mycroft_db.db.autocommit = False +insert_sql = ( + "insert into account.agreement VALUES (default, '{}', '1', '[today,]', {})" +) +doc_dir = '/Users/chrisveilleux/Mycroft/github/documentation/_pages/' +docs = { + 'Privacy Policy': doc_dir + 'embed-privacy-policy.md', + 'Terms of Use': doc_dir + 'embed-terms-of-use.md' +} +try: + for agrmt_type, doc_path in docs.items(): + lobj = mycroft_db.db.lobject(0, 'b') + with open(doc_path) as doc: + header_delimiter_count = 0 + while True: + rec = doc.readline() + if rec == '---\n': + header_delimiter_count += 1 + if header_delimiter_count == 2: + break + doc_html = markdown( + doc.read(), + output_format='html5' + ) + lobj.write(doc_html) + mycroft_db.execute_sql( + insert_sql.format(agrmt_type, lobj.oid) + ) + mycroft_db.execute_sql( + "grant select on large object {} to selene".format(lobj.oid) + ) + mycroft_db.execute_sql( + insert_sql.format('Open Dataset', 'null') + ) +except: + mycroft_db.db.rollback() + raise +else: + mycroft_db.db.commit() + +mycroft_db.db.autocommit = True + +reference_file_dir = '/Users/chrisveilleux/Mycroft' + +print('Building geography.country table') +country_file = 'country.txt' +country_insert = """ +INSERT INTO + geography.country (iso_code, name) +VALUES + ('{iso_code}', '{country_name}') +""" + +with open(path.join(reference_file_dir, country_file)) as countries: + while True: + rec = countries.readline() + if rec.startswith('#ISO'): + break + + for country in countries.readlines(): + country_fields = country.split('\t') + insert_args = dict( + iso_code=country_fields[0], + country_name=country_fields[4] + ) + mycroft_db.execute_sql(country_insert.format(**insert_args)) + +print('Building geography.region table') +region_file = 'regions.txt' +region_insert = """ +INSERT INTO + geography.region (country_id, region_code, name) +VALUES + ( + (SELECT id FROM geography.country WHERE iso_code = %(iso_code)s), + %(region_code)s, + %(region_name)s) +""" +with open(path.join(reference_file_dir, region_file)) as regions: + for region in regions.readlines(): + region_fields = region.split('\t') + country_iso_code = region_fields[0][:2] + insert_args = dict( + iso_code=country_iso_code, + region_code=region_fields[0], + region_name=region_fields[1] + ) + mycroft_db.execute_sql(region_insert, insert_args) + +print('Building geography.timezone table') +timezone_file = 'timezones.txt' +timezone_insert = """ +INSERT INTO + geography.timezone (country_id, name, gmt_offset, dst_offset) +VALUES + ( + (SELECT id FROM geography.country WHERE iso_code = %(iso_code)s), + %(timezone_name)s, + %(gmt_offset)s, + %(dst_offset)s + ) +""" +with open(path.join(reference_file_dir, timezone_file)) as timezones: + timezones.readline() + for timezone in timezones.readlines(): + timezone_fields = timezone.split('\t') + insert_args = dict( + iso_code=timezone_fields[0], + timezone_name=timezone_fields[1], + gmt_offset=timezone_fields[2], + dst_offset=timezone_fields[3] + ) + mycroft_db.execute_sql(timezone_insert, insert_args) + +print('Building geography.city table') +cities_file = 'cities500.txt' +region_query = "SELECT id, region_code FROM geography.region" +query_result = mycroft_db.execute_sql(region_query) +region_lookup = dict() +for row in query_result.fetchall(): + region_lookup[row[1]] = row[0] + +timezone_query = "SELECT id, name FROM geography.timezone" +query_result = mycroft_db.execute_sql(timezone_query) +timezone_lookup = dict() +for row in query_result.fetchall(): + timezone_lookup[row[1]] = row[0] +# city_insert = """ +# INSERT INTO +# geography.city (region_id, timezone_id, name, latitude, longitude) +# VALUES +# (%(region_id)s, %(timezone_id)s, %(city_name)s, %(latitude)s, %(longitude)s) +# """ +with open(path.join(reference_file_dir, cities_file)) as cities: + with open(path.join(reference_file_dir, 'city.dump'), 'w') as dump_file: + for city in cities.readlines(): + city_fields = city.split('\t') + city_region = city_fields[8] + '.' + city_fields[10] + region_id = region_lookup.get(city_region) + timezone_id = timezone_lookup[city_fields[17]] + if region_id is not None: + dump_file.write('\t'.join([ + region_id, + timezone_id, + city_fields[1], + city_fields[4], + city_fields[5] + ]) + '\n') + # mycroft_db.execute_sql(city_insert, insert_args) +with open(path.join(reference_file_dir, 'city.dump')) as dump_file: + cursor = mycroft_db.db.cursor() + cursor.copy_from(dump_file, 'geography.city', columns=( + 'region_id', 'timezone_id', 'name', 'latitude', 'longitude') + ) +remove(path.join(reference_file_dir, 'city.dump')) + +mycroft_db.close_db()