selene-backend/db/scripts/neo4j-postgres.py

# Mycroft Server - Backend
# Copyright (C) 2019 Mycroft AI Inc
# SPDX-License-Identifier: 	AGPL-3.0-or-later
#
# This file is part of the Mycroft Server.
#
# The Mycroft Server is free software: you can redistribute it and/or
# modify it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import csv
import datetime
import json
import uuid
from collections import defaultdict

import time
from geopy.distance import distance
from psycopg2 import connect
from psycopg2.extras import execute_batch

users = {}
user_settings = {}
subscription = {}
devices = {}
user_devices = {}
skills = {}
skill_sections = {}
skill_fields = {}
skill_field_values = {}
device_to_skill = {}
skill_to_section = {}
section_to_field = {}
device_to_field = {}
locations = {}
timezones = {}
cities = {}
regions = {}
countries = {}
coordinates = {}
device_location = {}
hey_mycroft = str(uuid.uuid4())
christopher = str(uuid.uuid4())
ezra = str(uuid.uuid4())
jarvis = str(uuid.uuid4())

default_wake_words = {
    "hey mycroft": hey_mycroft,
    "christopher": christopher,
    "hey ezra": ezra,
    "hey jarvis": jarvis,
}


def load_csv():
    with open("users.csv") as user_csv:
        user_reader = csv.reader(user_csv)
        next(user_reader, None)
        for row in user_reader:
            # email, password
            users[row[0]] = {}
            users[row[0]]["email"] = row[1]
            users[row[0]]["password"] = row[2]
            users[row[0]]["terms"] = row[3]
            users[row[0]]["privacy"] = row[4]

    with open("user_settings.csv") as user_setting_csv:
        user_setting_reader = csv.reader(user_setting_csv)
        next(user_setting_reader, None)
        for row in user_setting_reader:
            user_settings[row[0]] = {}
            user_settings[row[0]]["date_format"] = row[1]
            user_settings[row[0]]["time_format"] = row[2]
            user_settings[row[0]]["measurement_system"] = row[3]
            user_settings[row[0]]["tts_type"] = row[4]
            user_settings[row[0]]["tts_voice"] = row[5]
            user_settings[row[0]]["wake_word"] = row[6]
            user_settings[row[0]]["sample_rate"] = row[7]
            user_settings[row[0]]["channels"] = row[8]
            user_settings[row[0]]["pronunciation"] = row[9]
            user_settings[row[0]]["threshold"] = row[10]
            user_settings[row[0]]["threshold_multiplier"] = row[11]
            user_settings[row[0]]["dynamic_energy_ratio"] = row[12]

    with open("subscription.csv") as subscription_csv:
        subscription_reader = csv.reader(subscription_csv)
        next(subscription_reader, None)
        for row in subscription_reader:
            subscription[row[0]] = {}
            subscription[row[0]]["stripe_customer_id"] = row[1]
            subscription[row[0]]["last_payment_ts"] = row[2]
            subscription[row[0]]["type"] = row[3]

    with open("devices.csv") as devices_csv:
        devices_reader = csv.reader(devices_csv)
        next(devices_reader, None)
        for row in devices_reader:
            devices[row[0]] = {}
            user_uuid = row[1]
            devices[row[0]]["user_uuid"] = row[1]
            devices[row[0]]["name"] = row[2]
            devices[row[0]]["description"] = (row[3],)
            devices[row[0]]["platform"] = (row[4],)
            devices[row[0]]["enclosure_version"] = row[5]
            devices[row[0]]["core_version"] = row[6]

            if user_uuid in user_devices:
                user_devices[user_uuid].append((row[0], row[2]))
            else:
                user_devices[user_uuid] = [(row[0], row[2])]

    with open("skill.csv") as skill_csv:
        skill_reader = csv.reader(skill_csv)
        next(skill_reader, None)
        for row in skill_reader:
            skill = row[0]
            skills[skill] = {}
            dev_uuid = row[1]
            skills[skill]["device_uuid"] = dev_uuid
            skills[skill]["name"] = row[2]
            skills[skill]["description"] = row[3]
            skills[skill]["identifier"] = row[4]
            if dev_uuid in device_to_skill:
                device_to_skill[dev_uuid].add(skill)
            else:
                device_to_skill[dev_uuid] = {skill}

    with open("skill_section.csv") as skill_section_csv:
        skill_section_reader = csv.reader(skill_section_csv)
        next(skill_section_reader, None)
        for row in skill_section_reader:
            section_uuid = row[0]
            skill_sections[section_uuid] = {}
            skill_uuid = row[1]
            skill_sections[section_uuid]["skill_uuid"] = skill_uuid
            skill_sections[section_uuid]["section"] = row[2]
            skill_sections[section_uuid]["display_order"] = row[3]
            if skill_uuid in skill_to_section:
                skill_to_section[skill_uuid].add(section_uuid)
            else:
                skill_to_section[skill_uuid] = {section_uuid}

    with open("skill_fields.csv") as skill_fields_csv:
        skill_fields_reader = csv.reader(skill_fields_csv)
        next(skill_fields_reader, None)
        for row in skill_fields_reader:
            field_uuid = row[0]
            skill_fields[field_uuid] = {}
            section_uuid = row[1]
            # skill_fields[field_uuid]['section_uuid'] = section_uuid
            skill_fields[field_uuid]["name"] = row[2]
            skill_fields[field_uuid]["type"] = row[3]
            skill_fields[field_uuid]["label"] = row[4]
            skill_fields[field_uuid]["hint"] = row[5]
            skill_fields[field_uuid]["placeholder"] = row[6]
            skill_fields[field_uuid]["hide"] = row[7]
            skill_fields[field_uuid]["options"] = row[8]
            # skill_fields[field_uuid]['order'] = row[9]
            if section_uuid in section_to_field:
                section_to_field[section_uuid].add(field_uuid)
            else:
                section_to_field[section_uuid] = {field_uuid}

    with open("skill_fields_values.csv") as skill_field_values_csv:
        skill_field_values_reader = csv.reader(skill_field_values_csv)
        next(skill_field_values_reader, None)
        for row in skill_field_values_reader:
            field_uuid = row[0]
            skill_field_values[field_uuid] = {}
            skill_field_values[field_uuid]["skill_uuid"] = row[1]
            device_uuid = row[2]
            skill_field_values[field_uuid]["device_uuid"] = device_uuid
            skill_field_values[field_uuid]["field_value"] = row[3]
            if device_uuid in device_to_field:
                device_to_field[device_uuid].add(field_uuid)
            else:
                device_to_field[device_uuid] = {field_uuid}

    with open("location.csv") as location_csv:
        location_reader = csv.reader(location_csv)
        next(location_reader, None)
        for row in location_reader:
            location_uuid = row[0]
            locations[location_uuid] = {}
            locations[location_uuid]["timezone"] = row[1]
            locations[location_uuid]["city"] = row[2]
            locations[location_uuid]["coordinate"] = row[3]

    with open("timezone.csv") as timezone_csv:
        timezone_reader = csv.reader(timezone_csv)
        next(timezone_reader, None)
        for row in timezone_reader:
            timezone_uuid = row[0]
            timezones[timezone_uuid] = {}
            timezones[timezone_uuid]["code"] = row[1]
            timezones[timezone_uuid]["name"] = row[2]

    with open("city.csv") as city_csv:
        city_reader = csv.reader(city_csv)
        next(city_reader, None)
        for row in city_reader:
            city_uuid = row[0]
            cities[city_uuid] = {}
            cities[city_uuid]["region"] = row[1]
            cities[city_uuid]["name"] = row[2]

    with open("region.csv") as region_csv:
        region_reader = csv.reader(region_csv)
        next(region_reader, None)
        for row in region_reader:
            region_uuid = row[0]
            regions[region_uuid] = {}
            regions[region_uuid]["country"] = row[1]
            regions[region_uuid]["name"] = row[2]
            regions[region_uuid]["code"] = row[3]

    with open("country.csv") as country_csv:
        country_reader = csv.reader(country_csv)
        next(country_reader, None)
        for row in country_reader:
            country_uuid = row[0]
            countries[country_uuid] = {}
            countries[country_uuid]["name"] = row[1]
            countries[country_uuid]["code"] = row[2]

    with open("coordinate.csv") as coordinate_csv:
        coordinate_reader = csv.reader(coordinate_csv)
        next(coordinate_reader, None)
        for row in coordinate_reader:
            coordinate_uuid = row[0]
            coordinates[coordinate_uuid] = {}
            coordinates[coordinate_uuid]["latitude"] = row[1]
            coordinates[coordinate_uuid]["longitude"] = row[2]

    with open("device_location.csv") as device_location_csv:
        device_location_reader = csv.reader(device_location_csv, None)
        next(device_location_reader, None)
        for row in device_location_reader:
            device_uuid = row[0]
            if device_uuid in devices:
                devices[device_uuid]["location"] = row[1]


def format_date(value):
    value = int(value)
    value = datetime.datetime.fromtimestamp(value // 1000)
    return f"{value:%Y-%m-%d}"


def format_timestamp(value):
    value = int(value)
    value = datetime.datetime.fromtimestamp(value // 1000)
    return f"{value:%Y-%m-%d %H:%M:%S}"


db = connect(dbname="mycroft", user="postgres", host="127.0.0.1")


db.autocommit = True

subscription_uuids = {}


def get_subscription_uuid(subs):
    if subs in subscription_uuids:
        return subscription_uuids[subs]
    else:
        cursor = db.cursor()
        cursor.execute(
            f"select id from account.membership s where s.rate_period = '{subs}'"
        )
        result = cursor.fetchone()
        subscription_uuids[subs] = result
        return result


tts_uuids = {}


def get_tts_uuid(tts):
    if tts in tts_uuids:
        return tts_uuids[tts]
    else:
        cursor = db.cursor()
        cursor.execute(
            f"select id from device.text_to_speech s where s.setting_name = '{tts}'"
        )
        result = cursor.fetchone()
        tts_uuids[tts] = result
        return result


def fill_account_table():
    query = (
        "insert into account.account("
        "id, "
        "email_address, "
        "password) "
        "values (%s, %s, %s)"
    )
    with db.cursor() as cur:
        accounts = (
            (uuid, account["email"], account["password"])
            for uuid, account in users.items()
        )
        execute_batch(cur, query, accounts, page_size=1000)


def fill_account_agreement_table():
    query = (
        "insert into account.account_agreement(account_id, agreement_id, accept_date)"
        "values (%s, (select id from account.agreement where agreement = %s), %s)"
    )
    with db.cursor() as cur:
        terms = (
            (uuid, "Terms of Use", format_timestamp(account["terms"]))
            for uuid, account in users.items()
            if account["terms"] != ""
        )
        privacy = (
            (uuid, "Privacy Policy", format_timestamp(account["privacy"]))
            for uuid, account in users.items()
            if account["privacy"] != ""
        )
        execute_batch(cur, query, terms, page_size=1000)
        execute_batch(cur, query, privacy, page_size=1000)


def fill_default_wake_word():
    query1 = (
        "insert into device.wake_word ("
        "id,"
        "setting_name,"
        "display_name,"
        "engine)"
        "values (%s, %s, %s, %s)"
    )
    query2 = (
        "insert into device.wake_word_settings("
        "wake_word_id,"
        "sample_rate,"
        "channels,"
        "pronunciation,"
        "threshold,"
        "threshold_multiplier,"
        "dynamic_energy_ratio)"
        "values (%s, %s, %s, %s, %s, %s, %s)"
    )
    wake_words = [
        (hey_mycroft, "Hey Mycroft", "Hey Mycroft", "precise"),
        (christopher, "Christopher", "Christopher", "precise"),
        (ezra, "Hey Ezra", "Hey Ezra", "precise"),
        (jarvis, "Hey Jarvis", "Hey Jarvis", "precise"),
    ]
    wake_word_settings = [
        (hey_mycroft, "16000", "1", "HH EY . M AY K R AO F T", "1e-90", "1", "1.5"),
        (christopher, "16000", "1", "K R IH S T AH F ER .", "1e-25", "1", "1.5"),
        (ezra, "16000", "1", "HH EY . EH Z R AH", "1e-10", "1", "2.5"),
        (jarvis, "16000", "1", "HH EY . JH AA R V AH S", "1e-25", "1", "1.5"),
    ]
    with db.cursor() as cur:
        execute_batch(cur, query1, wake_words)
        execute_batch(cur, query2, wake_word_settings)


def fill_wake_word_table():
    query = (
        "insert into device.wake_word ("
        "id,"
        "setting_name,"
        "display_name,"
        "engine,"
        "account_id)"
        "values (%s, %s, %s, %s, %s)"
    )

    def map_wake_word(user_id):
        wake_word_id = str(uuid.uuid4())
        wake_word = (
            user_settings[user_id]["wake_word"].lower()
            if user_id in user_settings
            else "hey mycroft"
        )
        mycroft_wake_word = default_wake_words.get(wake_word)
        if mycroft_wake_word is not None:
            wake_word_id = mycroft_wake_word
        users[user_id]["wake_word_id"] = wake_word_id
        return wake_word_id, wake_word, wake_word, "precise", user_id

    with db.cursor() as cur:
        wake_words = (map_wake_word(account_id) for account_id in users)
        wake_words = (
            wk
            for wk in wake_words
            if wk[0] not in (hey_mycroft, christopher, ezra, jarvis)
        )
        execute_batch(cur, query, wake_words, page_size=1000)


def fill_account_preferences_table():
    query = (
        "insert into device.account_preferences("
        "account_id, "
        "date_format, "
        "time_format, "
        "measurement_system)"
        "values (%s, %s, %s, %s)"
    )

    def map_account_preferences(user_uuid):
        if user_uuid in user_settings:
            user_setting = user_settings[user_uuid]
            date_format = user_setting["date_format"]
            if date_format == "DMY":
                date_format = "DD/MM/YYYY"
            else:
                date_format = "MM/DD/YYYY"
            time_format = user_setting["time_format"]
            if time_format == "full":
                time_format = "24 Hour"
            else:
                time_format = "12 Hour"
            measurement_system = user_setting["measurement_system"]
            if measurement_system == "metric":
                measurement_system = "Metric"
            elif measurement_system == "imperial":
                measurement_system = "Imperial"
            tts_type = user_setting["tts_type"]
            tts_voice = user_setting["tts_voice"]
            if tts_type == "MimicSetting":
                if tts_voice == "ap":
                    tts = "ap"
                elif tts_voice == "trinity":
                    tts = "amy"
                else:
                    tts = "ap"
            elif tts_type == "Mimic2Setting":
                tts = "kusal"
            elif tts_type == "GoogleTTSSetting":
                tts = "google"
            else:
                tts = "ap"
            text_to_speech_id = get_tts_uuid(tts)
            users[user_uuid]["text_to_speech_id"] = text_to_speech_id
            return user_uuid, date_format, time_format, measurement_system
        else:
            text_to_speech_id = get_tts_uuid("ap")
            users[user_uuid]["text_to_speech_id"] = text_to_speech_id
            return user_uuid, "MM/DD/YYYY", "12 Hour", "Imperial"

    with db.cursor() as cur:
        account_preferences = (
            map_account_preferences(user_uuid) for user_uuid in users
        )
        execute_batch(cur, query, account_preferences, page_size=1000)


def fill_subscription_table():
    query = (
        "insert into account.account_membership("
        "account_id, "
        "membership_id, "
        "membership_ts_range, "
        "payment_account_id,"
        "payment_method,"
        "payment_id) "
        "values (%s, %s, %s, %s, %s, %s)"
    )

    def map_subscription(user_uuid):
        subscr = subscription[user_uuid]
        stripe_customer_id = subscr["stripe_customer_id"]
        start = format_timestamp(subscr["last_payment_ts"])
        subscription_ts_range = "[{},)".format(start)
        subscription_type = subscr["type"]
        if subscription_type == "MonthlyAccount":
            subscription_type = "month"
        elif subscription_type == "YearlyAccount":
            subscription_type = "year"
        subscription_uuid = get_subscription_uuid(subscription_type)
        return (
            user_uuid,
            subscription_uuid,
            subscription_ts_range,
            stripe_customer_id,
            "Stripe",
            "subscription_id",
        )

    with db.cursor() as cur:
        account_subscriptions = (
            map_subscription(user_uuid) for user_uuid in subscription
        )
        execute_batch(cur, query, account_subscriptions, page_size=1000)


def fill_wake_word_settings_table():
    query = (
        "insert into device.wake_word_settings("
        "wake_word_id,"
        "sample_rate,"
        "channels,"
        "pronunciation,"
        "threshold,"
        "threshold_multiplier,"
        "dynamic_energy_ratio)"
        "values (%s, %s, %s, %s, %s, %s, %s)"
    )

    def map_wake_word_settings(user_uuid):
        user_setting = user_settings[user_uuid]
        wake_word_id = users[user_uuid]["wake_word_id"]
        sample_rate = user_setting["sample_rate"]
        channels = user_setting["channels"]
        pronunciation = user_setting["pronunciation"]
        threshold = user_setting["threshold"]
        threshold_multiplier = user_setting["threshold_multiplier"]
        dynamic_energy_ratio = user_setting["dynamic_energy_ratio"]
        return (
            wake_word_id,
            sample_rate,
            channels,
            pronunciation,
            threshold,
            threshold_multiplier,
            dynamic_energy_ratio,
        )

    with db.cursor() as cur:
        account_wake_word_settings = (
            map_wake_word_settings(user_uuid)
            for user_uuid in users
            if user_uuid in user_settings
        )
        account_wake_word_settings = (
            wks
            for wks in account_wake_word_settings
            if wks[0] not in (hey_mycroft, christopher, ezra, jarvis)
        )
        execute_batch(cur, query, account_wake_word_settings, page_size=1000)


def change_device_name():
    for user in user_devices:
        if user in users:
            device_names = defaultdict(list)
            for device_uuid, name in user_devices[user]:
                device_names[name].append(device_uuid)
            for name in device_names:
                uuids = device_names[name]
                if len(uuids) > 1:
                    count = 1
                    for uuid in uuids:
                        devices[uuid]["name"] = "{name}-{uuid}".format(
                            name=name, uuid=uuid
                        )
                        count += 1


def fill_device_table():
    query = (
        "insert into device.device("
        "id, "
        "account_id, "
        "name, "
        "placement,"
        "platform,"
        "enclosure_version,"
        "core_version,"
        "wake_word_id,"
        "geography_id,"
        "text_to_speech_id) "
        "values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    )
    query2 = (
        "insert into device.geography("
        "id,"
        "account_id,"
        "country_id,"
        "region_id,"
        "city_id,"
        "timezone_id) "
        "values (%s, %s, %s, %s, %s, %s)"
    )

    with db.cursor() as cur:
        query_geography = """
        SELECT
            city.id, region.id, country.id, timezone.id
        FROM
            geography.city city
        INNER JOIN
            geography.region region ON city.region_id = region.id
        INNER JOIN
            geography.country country ON region.country_id = country.id
        INNER JOIN
            geography.timezone timezone ON country.id = timezone.country_id
        WHERE
            city.name = %s and region.name = %s and timezone.name = %s;
        """
        cur.execute(query_geography, ("Lawrence", "Kansas", "America/Chicago"))
        city_default, region_default, country_default, timezone_default = cur.fetchone()

    def map_geography(account_id, device_id):
        geography_id = str(uuid.uuid4())
        with db.cursor() as cur:
            query = """
            SELECT
                city.id, region.id, country.id, timezone.id
            FROM
                geography.city city
            INNER JOIN
                geography.region region ON city.region_id = region.id
            INNER JOIN
                geography.country country ON region.country_id = country.id
            INNER JOIN
                geography.timezone timezone ON country.id = timezone.country_id
            WHERE
                city.name = %s and region.name = %s and timezone.name = %s and country.name = %s;
            """
            location_uuid = devices[device_id].get("location")
            if location_uuid is not None:
                location = locations[location_uuid]
                timezone_entity = timezones[location["timezone"]]
                timezone = timezone_entity["code"]
                city_entity = cities[location["city"]]
                city = city_entity["name"]
                region_entity = regions[city_entity["region"]]
                region = region_entity["name"]
                country_entity = countries[region_entity["country"]]
                country = country_entity["name"]
                cur.execute(query, (city, region, timezone, country))
                result = cur.fetchone()
                if result is not None:
                    city, region, country, timezone = result
                    return geography_id, account_id, country, region, city, timezone
        return (
            geography_id,
            account_id,
            country_default,
            region_default,
            city_default,
            timezone_default,
        )

    def map_device(device_id):
        device = devices[device_id]
        account_id = device["user_uuid"]
        name = device["name"]
        placement = device["description"]
        platform = device["platform"]
        enclosure_version = device["enclosure_version"]
        core_version = device["core_version"]
        wake_word_id = users[account_id]["wake_word_id"]
        geography_id = device["geography_id"]

        user_setting = user_settings[account_id]
        tts_type = user_setting["tts_type"]
        tts_voice = user_setting["tts_voice"]
        if tts_type == "MimicSetting":
            if tts_voice == "ap":
                tts = "ap"
            elif tts_voice == "trinity":
                tts = "amy"
            else:
                tts = "ap"
        elif tts_type == "Mimic2Setting":
            tts = "kusal"
        elif tts_type == "GoogleTTSSetting":
            tts = "google"
        else:
            tts = "ap"
        text_to_speech_id = get_tts_uuid(tts)

        return (
            device_id,
            account_id,
            name,
            placement,
            platform,
            enclosure_version,
            core_version,
            wake_word_id,
            geography_id,
            text_to_speech_id,
        )

    with db.cursor() as cur:
        geography_batch = []
        for user in user_devices:
            if user in users and user in user_settings:
                aux = user_devices[user]
                device_id, _ = aux[0]
                geography = map_geography(user, device_id)
                geography_batch.append(geography)
                for device_id, name in aux:
                    devices[device_id]["geography_id"] = geography[0]
        execute_batch(cur, query2, geography_batch, page_size=1000)
        devices_batch = (
            map_device(device_id)
            for user in user_devices
            if user in users and user in user_settings
            for device_id, name in user_devices[user]
        )
        execute_batch(cur, query, devices_batch, page_size=1000)


def fill_skills_table():
    skills_batch = []
    settings_display_batch = []
    device_skill_batch = []
    for user in user_devices:
        if user in users and user in user_settings:
            for device_uuid, name in user_devices[user]:
                if device_uuid in device_to_skill:
                    for skill_uuid in device_to_skill[device_uuid]:
                        skill = skills[skill_uuid]
                        skill_name = skill["name"]
                        identifier = skill["identifier"]
                        sections = []
                        settings = {}
                        if skill_uuid in skill_to_section:
                            for section_uuid in skill_to_section[skill_uuid]:
                                section = skill_sections[section_uuid]
                                section_name = section["section"]
                                fields = []
                                if section_uuid in section_to_field:
                                    for field_uuid in section_to_field[section_uuid]:
                                        fields.append(skill_fields[field_uuid])
                                        if field_uuid in skill_field_values:
                                            settings[
                                                skill_fields[field_uuid]["name"]
                                            ] = skill_field_values[field_uuid][
                                                "field_value"
                                            ]
                                sections.append(
                                    {"name": section_name, "fields": fields}
                                )
                        skill_setting_display = {
                            "name": skill_name,
                            "identifier": identifier,
                            "skillMetadata": {"sections": sections},
                        }
                        skills_batch.append((skill_uuid, skill_name))
                        meta_id = str(uuid.uuid4())
                        settings_display_batch.append(
                            (meta_id, skill_uuid, json.dumps(skill_setting_display))
                        )
                        device_skill_batch.append(
                            (device_uuid, skill_uuid, meta_id, json.dumps(settings))
                        )

    with db.cursor() as curr:
        query = "insert into skill.skill(id, name) values (%s, %s)"
        execute_batch(curr, query, skills_batch, page_size=1000)
        query = "insert into skill.settings_display(id, skill_id, settings_display) values (%s, %s, %s)"
        execute_batch(curr, query, settings_display_batch, page_size=1000)
        query = (
            "insert into device.device_skill(device_id, skill_id, skill_settings_display_id, settings) "
            "values (%s, %s, %s, %s)"
        )
        execute_batch(curr, query, device_skill_batch, page_size=1000)


def analyze_locations():
    matches = 0
    mismatches = 0
    g_mismatches = defaultdict(lambda: defaultdict(list))
    for city in cities.values():
        region = regions[city["region"]]
        country = countries[region["country"]]
        city_name = city["name"]
        region_name = region["name"]
        country_name = country["name"]
        remove = [
            "District",
            "Region",
            "Development",
            "Prefecture",
            "Community",
            "County",
            "Province",
            "Division",
            "Voivodeship",
            "State",
            "of",
            "Governorate",
        ]
        with db.cursor() as curr:
            original_region_name = region_name
            region_name = " ".join(i for i in region_name.split() if i not in remove)
            query = (
                "select city.name "
                "from geography.city city "
                "inner join geography.region region on city.region_id = region.id "
                "inner join geography.country country on region.country_id = country.id "
                "where "
                "city.name = '{}' and "
                "(region.name = '{}' or region.name = '{}') and "
                "country.name = '{}'".format(
                    city_name.replace("'", "''"),
                    original_region_name.replace("'", "''"),
                    region_name.replace("'", "''"),
                    country_name.replace("'", "''"),
                )
            )
            curr.execute(query)
            result = curr.fetchone()
            if result is None:
                mismatches += 1
                g_mismatches[country_name][region_name].append(city_name)
            else:
                matches += 1

    for country2, regions2 in g_mismatches.items():
        for region2, cities2 in regions2.items():
            for city2 in cities2:
                print("{} - {} - {}".format(country2, region2, city2))

    print("Number os mismatches: {}".format(mismatches))


def analyze_location_2():
    aux = defaultdict(lambda: defaultdict(lambda: defaultdict(str)))

    locations_from_db = defaultdict(list)
    with db.cursor() as cur:
        cur.execute(
            "select "
            "c1.id, "
            "c1.name, "
            "c1.latitude, "
            "c1.longitude, "
            "r.name, "
            "c2.name, "
            "c2.iso_code "
            "from geography.city c1 "
            "inner join geography.region r on c1.region_id = r.id "
            "inner join geography.country c2 on r.country_id = c2.id"
        )
        for c1_id, c1, latitude, longitude, r, c2_name, c2_code in cur:
            aux[c2_name][r][c1] = c1_id
            locations_from_db[c2_code].append((c1, latitude, longitude))

    for location_uuid, location in locations.items():
        coordinate = coordinates[location_uuid]
        city = cities[location["city"]]
        city_name = city["name"]
        region = regions[city["region"]]
        region_name = region["name"]
        country = countries[region["country"]]
        country_code = country["code"]
        country_name = country["name"]

        res = aux.get(country_name)
        if res is not None:
            res = res.get(region_name)
            if res is not None:
                res = res.get(city_name)
                if res is not None:
                    print("Match: {}".format(city_name))
                    continue
        min_dist = None
        result_name = None
        for c1_name, latitude, longitude in locations_from_db[country_code]:
            point1 = (float(latitude), float(longitude))
            point2 = (float(coordinate["latitude"]), float(coordinate["longitude"]))
            dist = distance(point1, point2).km
            if min_dist is None or dist < min_dist:
                min_dist = dist
                result_name = c1_name
        print("Actual: {}, calculated: {}".format(city_name, result_name))


start = time.time()
load_csv()
end = time.time()

print("Time to load CSVs {}".format(end - start))

start = time.time()
print("Importing account table")
# fill_account_table()
print("Importing agreements table")
# fill_account_agreement_table()
print("Importing account preferences table")
# fill_account_preferences_table()
print("Importing subscription table")
# fill_subscription_table()
print("Importing wake word table")
# fill_default_wake_word()
# fill_wake_word_table()
print("Importing wake word settings table")
# fill_wake_word_settings_table()
print("Importing device table")
# change_device_name()
# fill_device_table()
print("Importing skills table")
# fill_skills_table()
analyze_location_2()
end = time.time()
print("Time to import: {}".format(end - start))