javascriptstring

Pattern Matching and number extraction


I created the following code to extract numerical information from a user-provided string, which specifies the level or floor in a building. The goal is to accurately extract the numerical value from the input. However, the current implementation does not handle hyphenated numbers correctly. For instance, "twenty-third" is incorrectly resolved as 20 instead of 23.

function extractLevelFromString(input) {
    // Normalize the input string
    const normalizedInput = input.toLowerCase();

    
    const wordToNumberMap = {
        "one": 1, "first": 1,
        "two": 2, "second": 2,
        "three": 3, "third": 3,
        "four": 4, "fourth": 4,
        "five": 5, "fifth": 5,
        "six": 6, "sixth": 6,
        "seven": 7, "seventh": 7,
        "eight": 8, "eighth": 8,
        "nine": 9, "ninth": 9,
        "ten": 10, "tenth": 10,
        "eleven": 11, "eleventh": 11,
        "twelve": 12, "twelfth": 12,
        "thirteen": 13, "thirteenth": 13,
        "fourteen": 14, "fourteenth": 14,
        "fifteen": 15, "fifteenth": 15,
        "sixteen": 16, "sixteenth": 16,
        "seventeen": 17, "seventeenth": 17,
        "eighteen": 18, "eighteenth": 18,
        "nineteen": 19, "nineteenth": 19,
        "twenty": 20, "twentieth": 20,
        "twenty-one": 21, "twenty-first": 21,
        "twenty-two": 22, "twenty-second": 22,
        "twenty-three": 23, "twenty-third": 23,
        "twenty-four": 24, "twenty-fourth": 24,
        "twenty-five": 25, "twenty-fifth": 25,
        "twenty-six": 26, "twenty-sixth": 26,
        "twenty-seven": 27, "twenty-seventh": 27,
        "twenty-eight": 28, "twenty-eighth": 28,
        "twenty-nine": 29, "twenty-ninth": 29,
        "thirty": 30, "thirtieth": 30,
        "thirty-one": 31, "thirty-first": 31,
        "thirty-two": 32, "thirty-second": 32,
        "thirty-three": 33, "thirty-third": 33,
        "thirty-four": 34, "thirty-fourth": 34,
        "thirty-five": 35, "thirty-fifth": 35,
        "thirty-six": 36, "thirty-sixth": 36,
        "thirty-seven": 37, "thirty-seventh": 37,
        "thirty-eight": 38, "thirty-eighth": 38,
        "thirty-nine": 39, "thirty-ninth": 39,
        "forty": 40, "fortieth": 40,
        "forty-one": 41, "forty-first": 41,
        "forty-two": 42, "forty-second": 42,
        "forty-three": 43, "forty-third": 43,
        "forty-four": 44, "forty-fourth": 44,
        "forty-five": 45, "forty-fifth": 45,
        "forty-six": 46, "forty-sixth": 46,
        "forty-seven": 47, "forty-seventh": 47,
        "forty-eight": 48, "forty-eighth": 48,
        "forty-nine": 49, "forty-ninth": 49,
        "fifty": 50, "fiftieth": 50
    };
    

    const levelRegex = /\b(level|floor|on|at)?\s*(\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twenty-one|twenty-two|twenty-three|twenty-four|twenty-five|twenty-six|twenty-seven|twenty-eight|twenty-nine|thirty|thirty-one|thirty-two|thirty-three|thirty-four|thirty-five|thirty-six|thirty-seven|thirty-eight|thirty-nine|forty|forty-one|forty-two|forty-three|forty-four|forty-five|forty-six|forty-seven|forty-eight|forty-nine|fifty|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth)(?:st|nd|rd|th)?\b/gi;
 
    const matches = normalizedInput.matchAll(levelRegex);

    // Process matches
    for (const match of matches) {
        const levelCandidate = match[2]; // Get the potential level part

        // If numeric, return directly
        if (!isNaN(levelCandidate)) {
            return parseInt(levelCandidate, 10);
        }

        // If word-based, map to a number
        if (wordToNumberMap[levelCandidate]) {
            return wordToNumberMap[levelCandidate];
        }
    }

    // Return null if no level found
    return null;
}

I tried this using regex pattern matching and was expecting the resolution of numbers from the input string.


Solution

  • One option is to place the single digit word matches (i.e one, two, ..., twenty ..) at the end of the regex expression. So your regex expression will look like this:

    const levelRegex = /\b(level|floor|on|at)?\s*(\d+|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty-one|twenty-two|twenty-three|twenty-four|twenty-five|twenty-six|twenty-seven|twenty-eight|twenty-nine|thirty|thirty-one|thirty-two|thirty-three|thirty-four|thirty-five|thirty-six|thirty-seven|thirty-eight|thirty-nine|forty|forty-one|forty-two|forty-three|forty-four|forty-five|forty-six|forty-seven|forty-eight|forty-nine|fifty|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth|one|two|three|four|five|six|seven|eight|nine|ten|twenty)(?:st|nd|rd|th)?\b/gi;
    

    Then this will resolve the "twenty-third" to 23.

    const wordToNumberMap = {
        "one": 1, "first": 1,
        "two": 2, "second": 2,
        "three": 3, "third": 3,
        "four": 4, "fourth": 4,
        "five": 5, "fifth": 5,
        "six": 6, "sixth": 6,
        "seven": 7, "seventh": 7,
        "eight": 8, "eighth": 8,
        "nine": 9, "ninth": 9,
        "ten": 10, "tenth": 10,
        "eleven": 11, "eleventh": 11,
        "twelve": 12, "twelfth": 12,
        "thirteen": 13, "thirteenth": 13,
        "fourteen": 14, "fourteenth": 14,
        "fifteen": 15, "fifteenth": 15,
        "sixteen": 16, "sixteenth": 16,
        "seventeen": 17, "seventeenth": 17,
        "eighteen": 18, "eighteenth": 18,
        "nineteen": 19, "nineteenth": 19,
        "twenty": 20, "twentieth": 20,
        "twenty-one": 21, "twenty-first": 21,
        "twenty-two": 22, "twenty-second": 22,
        "twenty-three": 23, "twenty-third": 23,
        "twenty-four": 24, "twenty-fourth": 24,
        "twenty-five": 25, "twenty-fifth": 25,
        "twenty-six": 26, "twenty-sixth": 26,
        "twenty-seven": 27, "twenty-seventh": 27,
        "twenty-eight": 28, "twenty-eighth": 28,
        "twenty-nine": 29, "twenty-ninth": 29,
        "thirty": 30, "thirtieth": 30,
        "thirty-one": 31, "thirty-first": 31,
        "thirty-two": 32, "thirty-second": 32,
        "thirty-three": 33, "thirty-third": 33,
        "thirty-four": 34, "thirty-fourth": 34,
        "thirty-five": 35, "thirty-fifth": 35,
        "thirty-six": 36, "thirty-sixth": 36,
        "thirty-seven": 37, "thirty-seventh": 37,
        "thirty-eight": 38, "thirty-eighth": 38,
        "thirty-nine": 39, "thirty-ninth": 39,
        "forty": 40, "fortieth": 40,
        "forty-one": 41, "forty-first": 41,
        "forty-two": 42, "forty-second": 42,
        "forty-three": 43, "forty-third": 43,
        "forty-four": 44, "forty-fourth": 44,
        "forty-five": 45, "forty-fifth": 45,
        "forty-six": 46, "forty-sixth": 46,
        "forty-seven": 47, "forty-seventh": 47,
        "forty-eight": 48, "forty-eighth": 48,
        "forty-nine": 49, "forty-ninth": 49,
        "fifty": 50, "fiftieth": 50
    };
    
    
    const levelRegex = /\b(level|floor|on|at)?\s*(\d+|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty-one|twenty-two|twenty-three|twenty-four|twenty-five|twenty-six|twenty-seven|twenty-eight|twenty-nine|thirty|thirty-one|thirty-two|thirty-three|thirty-four|thirty-five|thirty-six|thirty-seven|thirty-eight|thirty-nine|forty|forty-one|forty-two|forty-three|forty-four|forty-five|forty-six|forty-seven|forty-eight|forty-nine|fifty|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth|one|two|three|four|five|six|seven|eight|nine|ten|twenty)(?:st|nd|rd|th)?\b/gi;
    
    function extractLevelFromString(input) {
        // Normalize the input string
        const normalizedInput = input.toLowerCase();
     
        const matches = normalizedInput.matchAll(levelRegex);
    
        // Process matches
        for (const match of matches) {
            const levelCandidate = match[2]; // Get the potential level part
    
            // If numeric, return directly
            if (!isNaN(levelCandidate)) {
                return parseInt(levelCandidate, 10);
            }
    
            // If word-based, map to a number
            if (wordToNumberMap[levelCandidate]) {
                return wordToNumberMap[levelCandidate];
            }
        }
    
        // Return null if no level found
        return null;
    }
    
    console.log(extractLevelFromString('Twenty-Third'))
    console.log(extractLevelFromString('Twenty-Five'))