mirror of https://github.com/MycroftAI/mimic2.git
Remove dependency on inflect
parent
92b4379165
commit
e35859d0b0
|
@ -1,7 +1,6 @@
|
|||
# Note: this doesn't include tensorflow or tensorflow-gpu because the package you need to install
|
||||
# depends on your platform. It is assumed you have already installed tensorflow.
|
||||
falcon==1.2.0
|
||||
inflect==0.2.5
|
||||
librosa==0.5.1
|
||||
matplotlib==2.0.2
|
||||
numpy==1.13.0
|
||||
|
|
|
@ -2,40 +2,39 @@ from text.numbers import normalize_numbers
|
|||
|
||||
|
||||
def test_normalize_numbers():
|
||||
assert normalize_numbers('0') == 'zero'
|
||||
assert normalize_numbers('1') == 'one'
|
||||
assert normalize_numbers('15') == 'fifteen'
|
||||
assert normalize_numbers('24') == 'twenty-four'
|
||||
assert normalize_numbers('24') == 'twenty four'
|
||||
assert normalize_numbers('100') == 'one hundred'
|
||||
assert normalize_numbers('101') == 'one hundred one'
|
||||
assert normalize_numbers('456') == 'four hundred fifty-six'
|
||||
assert normalize_numbers('456') == 'four hundred fifty six'
|
||||
assert normalize_numbers('1000') == 'one thousand'
|
||||
assert normalize_numbers('1800') == 'eighteen hundred'
|
||||
assert normalize_numbers('2,000') == 'two thousand'
|
||||
assert normalize_numbers('3000') == 'three thousand'
|
||||
assert normalize_numbers('18000') == 'eighteen thousand'
|
||||
assert normalize_numbers('24,000') == 'twenty-four thousand'
|
||||
assert normalize_numbers('124,001') == 'one hundred twenty-four thousand one'
|
||||
assert normalize_numbers('24,000') == 'twenty four thousand'
|
||||
assert normalize_numbers('124,001') == 'one hundred twenty four thousand one'
|
||||
assert normalize_numbers('999,999') == 'nine hundred ninety nine thousand nine hundred ninety nine'
|
||||
assert normalize_numbers('1000000002') == 'one billion two'
|
||||
assert normalize_numbers('1200000000') == 'one billion two hundred million'
|
||||
assert normalize_numbers('19800000004001') == 'nineteen trillion eight hundred billion four thousand one'
|
||||
assert normalize_numbers('712000000000000000') == 'seven hundred twelve quadrillion'
|
||||
assert normalize_numbers('1000000000000000000') == '1000000000000000000'
|
||||
assert normalize_numbers('6.4 sec') == 'six point four sec'
|
||||
|
||||
|
||||
def test_normalize_ordinals():
|
||||
assert normalize_numbers('1st') == 'first'
|
||||
assert normalize_numbers('2nd') == 'second'
|
||||
assert normalize_numbers('5th') == 'fifth'
|
||||
assert normalize_numbers('9th') == 'ninth'
|
||||
assert normalize_numbers('243rd place') == 'two hundred and forty-third place'
|
||||
|
||||
|
||||
def test_normalize_dates():
|
||||
assert normalize_numbers('1400') == 'fourteen hundred'
|
||||
assert normalize_numbers('1901') == 'nineteen oh one'
|
||||
assert normalize_numbers('1999') == 'nineteen ninety-nine'
|
||||
assert normalize_numbers('2000') == 'two thousand'
|
||||
assert normalize_numbers('2004') == 'two thousand four'
|
||||
assert normalize_numbers('2010') == 'twenty ten'
|
||||
assert normalize_numbers('2012') == 'twenty twelve'
|
||||
assert normalize_numbers('2025') == 'twenty twenty-five'
|
||||
assert normalize_numbers('September 11, 2001') == 'September eleven, two thousand one'
|
||||
assert normalize_numbers('July 26, 1984.') == 'July twenty-six, nineteen eighty-four.'
|
||||
assert normalize_numbers('15th') == 'fifteenth'
|
||||
assert normalize_numbers('212th street') == 'two hundred twelfth street'
|
||||
assert normalize_numbers('243rd place') == 'two hundred forty third place'
|
||||
assert normalize_numbers('1025th') == 'one thousand twenty fifth'
|
||||
assert normalize_numbers('1000000th') == 'one millionth'
|
||||
|
||||
|
||||
def test_normalize_money():
|
||||
|
@ -43,9 +42,9 @@ def test_normalize_money():
|
|||
assert normalize_numbers('$1') == 'one dollar'
|
||||
assert normalize_numbers('$10') == 'ten dollars'
|
||||
assert normalize_numbers('$.01') == 'one cent'
|
||||
assert normalize_numbers('$0.25') == 'twenty-five cents'
|
||||
assert normalize_numbers('$0.25') == 'twenty five cents'
|
||||
assert normalize_numbers('$5.00') == 'five dollars'
|
||||
assert normalize_numbers('$5.01') == 'five dollars, one cent'
|
||||
assert normalize_numbers('$135.99.') == 'one hundred thirty-five dollars, ninety-nine cents.'
|
||||
assert normalize_numbers('$135.99.') == 'one hundred thirty five dollars, ninety nine cents.'
|
||||
assert normalize_numbers('$40,000') == 'forty thousand dollars'
|
||||
assert normalize_numbers('for £2500!') == 'for twenty-five hundred pounds!'
|
||||
assert normalize_numbers('for £2500!') == 'for twenty five hundred pounds!'
|
||||
|
|
|
@ -31,13 +31,6 @@ def test_collapse_whitespace():
|
|||
assert cleaners.collapse_whitespace(' x. y, \tz') == ' x. y, z'
|
||||
|
||||
|
||||
def test_convert_to_ascii():
|
||||
assert cleaners.convert_to_ascii("raison d'être") == "raison d'etre"
|
||||
assert cleaners.convert_to_ascii('grüß gott') == 'gruss gott'
|
||||
assert cleaners.convert_to_ascii('안녕') == 'annyeong'
|
||||
assert cleaners.convert_to_ascii('Здравствуйте') == 'Zdravstvuite'
|
||||
|
||||
|
||||
def test_lowercase():
|
||||
assert cleaners.lowercase('Happy Birthday!') == 'happy birthday!'
|
||||
assert cleaners.lowercase('CAFÉ') == 'café'
|
||||
|
@ -48,13 +41,13 @@ def test_expand_abbreviations():
|
|||
|
||||
|
||||
def test_expand_numbers():
|
||||
assert cleaners.expand_numbers('3 apples and 44 pears') == 'three apples and forty-four pears'
|
||||
assert cleaners.expand_numbers('3 apples and 44 pears') == 'three apples and forty four pears'
|
||||
assert cleaners.expand_numbers('$3.50 for gas.') == 'three dollars, fifty cents for gas.'
|
||||
|
||||
|
||||
def test_cleaner_pipelines():
|
||||
text = 'Mr. Müller ate 2 Apples'
|
||||
assert cleaners.english_cleaners(text) == 'mister muller ate two apples'
|
||||
assert cleaners.transliteration_cleaners(text) == 'mr. muller ate 2 apples'
|
||||
assert cleaners.english_cleaners(text) == 'mister mller ate two apples'
|
||||
assert cleaners.transliteration_cleaners(text) == 'mr. mller ate 2 apples'
|
||||
assert cleaners.basic_cleaners(text) == 'mr. müller ate 2 apples'
|
||||
|
||||
|
|
111
text/numbers.py
111
text/numbers.py
|
@ -1,15 +1,68 @@
|
|||
import inflect
|
||||
import re
|
||||
|
||||
|
||||
_inflect = inflect.engine()
|
||||
_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
|
||||
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
|
||||
_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
|
||||
_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
|
||||
_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
|
||||
_ordinal_re = re.compile(r'([0-9]+)(st|nd|rd|th)')
|
||||
_number_re = re.compile(r'[0-9]+')
|
||||
|
||||
_units = [
|
||||
'',
|
||||
'one',
|
||||
'two',
|
||||
'three',
|
||||
'four',
|
||||
'five',
|
||||
'six',
|
||||
'seven',
|
||||
'eight',
|
||||
'nine',
|
||||
'ten',
|
||||
'eleven',
|
||||
'twelve',
|
||||
'thirteen',
|
||||
'fourteen',
|
||||
'fifteen',
|
||||
'sixteen',
|
||||
'seventeen',
|
||||
'eighteen',
|
||||
'nineteen'
|
||||
]
|
||||
|
||||
_tens = [
|
||||
'',
|
||||
'ten',
|
||||
'twenty',
|
||||
'thirty',
|
||||
'forty',
|
||||
'fifty',
|
||||
'sixty',
|
||||
'seventy',
|
||||
'eighty',
|
||||
'ninety',
|
||||
]
|
||||
|
||||
_digit_groups = [
|
||||
'',
|
||||
'thousand',
|
||||
'million',
|
||||
'billion',
|
||||
'trillion',
|
||||
'quadrillion',
|
||||
]
|
||||
|
||||
_ordinal_suffixes = [
|
||||
('one', 'first'),
|
||||
('two', 'second'),
|
||||
('three', 'third'),
|
||||
('five', 'fifth'),
|
||||
('eight', 'eighth'),
|
||||
('nine', 'ninth'),
|
||||
('twelve', 'twelfth'),
|
||||
('ty', 'tieth'),
|
||||
]
|
||||
|
||||
def _remove_commas(m):
|
||||
return m.group(1).replace(',', '')
|
||||
|
@ -40,23 +93,47 @@ def _expand_dollars(m):
|
|||
return 'zero dollars'
|
||||
|
||||
|
||||
def _expand_ordinal(m):
|
||||
return _inflect.number_to_words(m.group(0))
|
||||
def _standard_number_to_words(n, digit_group):
|
||||
parts = []
|
||||
if n >= 1000:
|
||||
# Format next higher digit group.
|
||||
parts.append(_standard_number_to_words(n // 1000, digit_group + 1))
|
||||
n = n % 1000
|
||||
|
||||
if n >= 100:
|
||||
parts.append('%s hundred' % _units[n // 100])
|
||||
if n % 100 >= len(_units):
|
||||
parts.append(_tens[(n % 100) // 10])
|
||||
parts.append(_units[(n % 100) % 10])
|
||||
else:
|
||||
parts.append(_units[n % 100])
|
||||
if n > 0:
|
||||
parts.append(_digit_groups[digit_group])
|
||||
return ' '.join([x for x in parts if x])
|
||||
|
||||
|
||||
def _number_to_words(n):
|
||||
# Handle special cases first, then go to the standard case:
|
||||
if n >= 1000000000000000000:
|
||||
return str(n) # Too large, just return the digits
|
||||
elif n == 0:
|
||||
return 'zero'
|
||||
elif n % 100 == 0 and n % 1000 != 0 and n < 3000:
|
||||
return _standard_number_to_words(n // 100, 0) + ' hundred'
|
||||
else:
|
||||
return _standard_number_to_words(n, 0)
|
||||
|
||||
|
||||
def _expand_number(m):
|
||||
num = int(m.group(0))
|
||||
if num > 1000 and num < 3000:
|
||||
if num == 2000:
|
||||
return 'two thousand'
|
||||
elif num > 2000 and num < 2010:
|
||||
return 'two thousand ' + _inflect.number_to_words(num % 100)
|
||||
elif num % 100 == 0:
|
||||
return _inflect.number_to_words(num // 100) + ' hundred'
|
||||
else:
|
||||
return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
|
||||
else:
|
||||
return _inflect.number_to_words(num, andword='')
|
||||
return _number_to_words(int(m.group(0)))
|
||||
|
||||
|
||||
def _expand_ordinal(m):
|
||||
num = _number_to_words(int(m.group(1)))
|
||||
for suffix, replacement in _ordinal_suffixes:
|
||||
if num.endswith(suffix):
|
||||
return num[:-len(suffix)] + replacement
|
||||
return num + 'th'
|
||||
|
||||
|
||||
def normalize_numbers(text):
|
||||
|
|
Loading…
Reference in New Issue