pythonif-statementiterationnumeral-system

English numerals to integers (iterative function)


Background

This stems from the kata (challenge) parseInt (reloaded). In theory, "nine hundred ninety-nine thousand nine hundred and ninety-nine" returns 999999—this is working now, but shorter cases like "two thousand" now fail. Note that I was trying recursion but now have resorted to iteration.

The Issue

The conditions for adding or multiplying to the result or the case with one number and multiple powers, e.g. "nine hundred thousand" in "one million nine hundred thousand" is poorly defined.

The Code

numbers = {'zero':0,'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':11,'twelve':12,'thirteen':13,'fourteen':14,'fifteen':15,'sixteen':16,'seventeen':17,'eighteen':18,'nineteen':19,'twenty':20,'thirty':30,'forty':40,'fifty':50,'sixty':60,'seventy':70,'eighty':80,'ninety':90,'eighty-six': 86, 'thirty-one': 31, 'forty-three': 43, 'forty-two': 42, 'fifty-eight': 58, 'sixty-seven': 67, 'thirty-two': 32, 'thirty-five': 35, 'seventy-nine': 79, 'thirty-four': 34, 'fifty-seven': 57, 'twenty-nine': 29, 'eighty-nine': 89, 'ninety-four': 94, 'seventy-eight': 78, 'ninety-one': 91, 'forty-one': 41, 'sixty-two': 62, 'twenty-eight': 28, 'eighty-eight': 88, 'seventy-seven': 77, 'forty-seven': 47, 'eighty-five': 85, 'eighty-three': 83, 'fifty-two': 52, 'eighty-two': 82, 'fifty-five': 55, 'twenty-seven': 27, 'seventy-four': 74, 'thirty-seven': 37, 'twenty-six': 26, 'sixty-six': 66, 'eighty-four': 84, 'sixty-four': 64, 'forty-eight': 48, 'fifty-four': 54, 'eighty-one': 81, 'thirty-three': 33, 'forty-four': 44, 'fifty-nine': 59, 'thirty-eight': 38, 'forty-six': 46, 'sixty-nine': 69, 'sixty-one': 61, 'sixty-three': 63, 'ninety-eight': 98, 'seventy-six': 76, 'seventy-one': 71, 'ninety-three': 93, 'fifty-three': 53, 'fifty-six': 56, 'seventy-five': 75, 'eighty-seven': 87, 'ninety-seven': 97, 'ninety-six': 96, 'ninety-nine': 99, 'twenty-one': 21, 'twenty-five': 25, 'ninety-five': 95, 'thirty-nine': 39, 'sixty-eight': 68, 'thirty-six': 36, 'twenty-four': 24, 'seventy-three': 73, 'seventy-two': 72, 'ninety-two': 92, 'twenty-three': 23, 'twenty-two': 22, 'forty-nine': 49, 'sixty-five': 65, 'fifty-one': 51, 'forty-five': 45}
powers = {'vigintitrillion': 1000000000000000000000000000000000000000000000000000000000000000000000000, 'septillion': 1000000000000000000000000, 'nonillion': 1000000000000000000000000000000, 'tredecillion': 1000000000000000000000000000000000000000000, 'vigintiquadrillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000, 'decillion': 1000000000000000000000000000000000, 'billion': 1000000000, 'duovigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000, 'thousand': 1000, 'duodecillion': 1000000000000000000000000000000000000000, 'septemdecillion': 1000000000000000000000000000000000000000000000000000000, 'vigintinonillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'octillion': 1000000000000000000000000000, 'quinvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000, 'octodecillion': 1000000000000000000000000000000000000000000000000000000000, 'novemdecillion': 1000000000000000000000000000000000000000000000000000000000000, 'trigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'quindecillion': 1000000000000000000000000000000000000000000000000, 'duotrigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'quattuordecillion': 1000000000000000000000000000000000000000000000, 'quadrillion': 1000000000000000, 'vigintiseptillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'vigintillion': 1000000000000000000000000000000000000000000000000000000000000000, 'untrigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'centillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'undecillion': 1000000000000000000000000000000000000, 'vigintunillion': 1000000000000000000000000000000000000000000000000000000000000000000, 'million': 1000000, 'septvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'vigintisextillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'vigintiduoillion': 1000000000000000000000000000000000000000000000000000000000000000000000, 'sextillion': 1000000000000000000000, 'octovigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'nonvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'sexdecillion': 1000000000000000000000000000000000000000000000000000, 'vigintoctillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'sexvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'trevigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000, 'unvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000, 'hundred': 100, 'quattuorvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000, 'quintillion': 1000000000000000000, 'googol': 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'vigintiquintrillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000}

def parse_int(string):
    result=0
    a=string.split(" ")
    b=[]
    
    for c in a:
        if c in numbers:
            b.append(c)
        elif c in powers:
            b[-1]+=" "+c
        elif c=="and":
            continue
        else:
            print("ERROR: UNRECOGNISED WORD(S).")
    
    for d, e in enumerate(b):
        if len(e.split(" "))==1:
            b[d]=numbers[e]
        else:
            b[d]=e.split(" ")
            b[d][0]=numbers[b[d][0]]
            f=1
            while f<len(b[d]):
                b[d][f]=powers[b[d][f]]
                f+=1
    
    if not(isinstance(b[0],int)):
           while len(b[0])>2:
               b[0][1]*=b[0][2]
               b[0].pop(2)
    
    while len(b)>0:
        if len(b)==1:
            if isinstance(b[0],int):
                result+=b[0]
                b.pop(0)
            else:
                if result==0:
                    result=1
                while len(b[0])>2:
                    b[0][1]*=b[0][2]
                    b[0].pop(2)
                result+=b[0][0]
                b.pop(0)
        else:
            if isinstance(b[1],int):
                b[1]+=b[0][0]*b[0][1]
                b.pop(0)
            else:
                while len(b[1])>2:
                    b[1][1]*=b[1][2]
                    b[1].pop(2)
                
                if b[0][1]<b[1][1]:
                    b[1][0]+=b[0][0]*b[0][1]
                    b.pop(0)
                else:
                    result+=b[0][0]*b[0][1]
                    b.pop(0)
    
    return(result)

The Output

Currently, the function returns the wrong results for 3 for "two thousand", 11 for "ten thousand", and 500003 for "five hundred thousand three hundred". It would be helpful if it returned the right integer. I will update frequently.


Solution

  • Through trial and error, I finished working on the parse_int() function, so here is the result.

    def parse_int(string):
        numbers = {'zero':0,'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':11,'twelve':12,'thirteen':13,'fourteen':14,'fifteen':15,'sixteen':16,'seventeen':17,'eighteen':18,'nineteen':19,'twenty':20,'thirty':30,'forty':40,'fifty':50,'sixty':60,'seventy':70,'eighty':80,'ninety':90,'eighty-six': 86, 'thirty-one': 31, 'forty-three': 43, 'forty-two': 42, 'fifty-eight': 58, 'sixty-seven': 67, 'thirty-two': 32, 'thirty-five': 35, 'seventy-nine': 79, 'thirty-four': 34, 'fifty-seven': 57, 'twenty-nine': 29, 'eighty-nine': 89, 'ninety-four': 94, 'seventy-eight': 78, 'ninety-one': 91, 'forty-one': 41, 'sixty-two': 62, 'twenty-eight': 28, 'eighty-eight': 88, 'seventy-seven': 77, 'forty-seven': 47, 'eighty-five': 85, 'eighty-three': 83, 'fifty-two': 52, 'eighty-two': 82, 'fifty-five': 55, 'twenty-seven': 27, 'seventy-four': 74, 'thirty-seven': 37, 'twenty-six': 26, 'sixty-six': 66, 'eighty-four': 84, 'sixty-four': 64, 'forty-eight': 48, 'fifty-four': 54, 'eighty-one': 81, 'thirty-three': 33, 'forty-four': 44, 'fifty-nine': 59, 'thirty-eight': 38, 'forty-six': 46, 'sixty-nine': 69, 'sixty-one': 61, 'sixty-three': 63, 'ninety-eight': 98, 'seventy-six': 76, 'seventy-one': 71, 'ninety-three': 93, 'fifty-three': 53, 'fifty-six': 56, 'seventy-five': 75, 'eighty-seven': 87, 'ninety-seven': 97, 'ninety-six': 96, 'ninety-nine': 99, 'twenty-one': 21, 'twenty-five': 25, 'ninety-five': 95, 'thirty-nine': 39, 'sixty-eight': 68, 'thirty-six': 36, 'twenty-four': 24, 'seventy-three': 73, 'seventy-two': 72, 'ninety-two': 92, 'twenty-three': 23, 'twenty-two': 22, 'forty-nine': 49, 'sixty-five': 65, 'fifty-one': 51, 'forty-five': 45}
        powers = {'vigintitrillion': 1000000000000000000000000000000000000000000000000000000000000000000000000, 'septillion': 1000000000000000000000000, 'nonillion': 1000000000000000000000000000000, 'tredecillion': 1000000000000000000000000000000000000000000, 'vigintiquadrillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000, 'decillion': 1000000000000000000000000000000000, 'billion': 1000000000, 'duovigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000, 'thousand': 1000, 'duodecillion': 1000000000000000000000000000000000000000, 'septemdecillion': 1000000000000000000000000000000000000000000000000000000, 'vigintinonillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'octillion': 1000000000000000000000000000, 'quinvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000, 'octodecillion': 1000000000000000000000000000000000000000000000000000000000, 'novemdecillion': 1000000000000000000000000000000000000000000000000000000000000, 'trigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'quindecillion': 1000000000000000000000000000000000000000000000000, 'duotrigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'quattuordecillion': 1000000000000000000000000000000000000000000000, 'quadrillion': 1000000000000000, 'vigintiseptillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'vigintillion': 1000000000000000000000000000000000000000000000000000000000000000, 'untrigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'centillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'undecillion': 1000000000000000000000000000000000000, 'vigintunillion': 1000000000000000000000000000000000000000000000000000000000000000000, 'million': 1000000, 'septvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'vigintisextillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'vigintiduoillion': 1000000000000000000000000000000000000000000000000000000000000000000000, 'sextillion': 1000000000000000000000, 'octovigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'nonvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'sexdecillion': 1000000000000000000000000000000000000000000000000000, 'vigintoctillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'sexvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'trevigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000, 'unvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000, 'hundred': 100, 'quattuorvigintillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000, 'quintillion': 1000000000000000000, 'googol': 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, 'vigintiquintrillion': 1000000000000000000000000000000000000000000000000000000000000000000000000000000}
        
        result=0
        a=string.split(" ")
        b=[]
        
        for c in a:
            if c in numbers:
                b.append(c)
            elif c in powers:
                b[-1]+=" "+c
            elif c=="and":
                continue
            else:
                print("ERROR: UNRECOGNISED WORD(S).")
                return(None)
        
        for d, e in enumerate(b):
            if len(e.split(" "))==1:
                b[d]=numbers[e]
            else:
                b[d]=e.split(" ")
                b[d][0]=numbers[b[d][0]]
                f=1
                while f<len(b[d]):
                    b[d][f]=powers[b[d][f]]
                    f+=1
        
        if not(isinstance(b[0],int)):
               while len(b[0])>2:
                   b[0][1]*=b[0][2]
                   b[0].pop(2)
        
        while len(b)>0:
            if len(b)==1:
                if isinstance(b[0],int):
                    result+=b[0]
                    b.pop(0)
                else:
                    while len(b[0])>1:
                        b[0][0]*=b[0][1]
                        b[0].pop(1)
                    result+=b[0][0]
                    b.pop(0)
            else:
                if isinstance(b[1],int):
                    b[1]+=b[0][0]*b[0][1]
                    b.pop(0)
                else:
                    while len(b[1])>2:
                        b[1][1]*=b[1][2]
                        b[1].pop(2)
                    
                    if b[0][1]<b[1][1]:
                        b[1][0]+=b[0][0]*b[0][1]
                        b.pop(0)
                    else:
                        result+=b[0][0]*b[0][1]
                        b.pop(0)
        
        return(result)