I created the following code to extract numerical information from a user-provided string, which specifies the level or floor in a building. The goal is to accurately extract the numerical value from the input. However, the current implementation does not handle hyphenated numbers correctly. For instance, "twenty-third" is incorrectly resolved as 20 instead of 23.
function extractLevelFromString(input) {
// Normalize the input string
const normalizedInput = input.toLowerCase();
const wordToNumberMap = {
"one": 1, "first": 1,
"two": 2, "second": 2,
"three": 3, "third": 3,
"four": 4, "fourth": 4,
"five": 5, "fifth": 5,
"six": 6, "sixth": 6,
"seven": 7, "seventh": 7,
"eight": 8, "eighth": 8,
"nine": 9, "ninth": 9,
"ten": 10, "tenth": 10,
"eleven": 11, "eleventh": 11,
"twelve": 12, "twelfth": 12,
"thirteen": 13, "thirteenth": 13,
"fourteen": 14, "fourteenth": 14,
"fifteen": 15, "fifteenth": 15,
"sixteen": 16, "sixteenth": 16,
"seventeen": 17, "seventeenth": 17,
"eighteen": 18, "eighteenth": 18,
"nineteen": 19, "nineteenth": 19,
"twenty": 20, "twentieth": 20,
"twenty-one": 21, "twenty-first": 21,
"twenty-two": 22, "twenty-second": 22,
"twenty-three": 23, "twenty-third": 23,
"twenty-four": 24, "twenty-fourth": 24,
"twenty-five": 25, "twenty-fifth": 25,
"twenty-six": 26, "twenty-sixth": 26,
"twenty-seven": 27, "twenty-seventh": 27,
"twenty-eight": 28, "twenty-eighth": 28,
"twenty-nine": 29, "twenty-ninth": 29,
"thirty": 30, "thirtieth": 30,
"thirty-one": 31, "thirty-first": 31,
"thirty-two": 32, "thirty-second": 32,
"thirty-three": 33, "thirty-third": 33,
"thirty-four": 34, "thirty-fourth": 34,
"thirty-five": 35, "thirty-fifth": 35,
"thirty-six": 36, "thirty-sixth": 36,
"thirty-seven": 37, "thirty-seventh": 37,
"thirty-eight": 38, "thirty-eighth": 38,
"thirty-nine": 39, "thirty-ninth": 39,
"forty": 40, "fortieth": 40,
"forty-one": 41, "forty-first": 41,
"forty-two": 42, "forty-second": 42,
"forty-three": 43, "forty-third": 43,
"forty-four": 44, "forty-fourth": 44,
"forty-five": 45, "forty-fifth": 45,
"forty-six": 46, "forty-sixth": 46,
"forty-seven": 47, "forty-seventh": 47,
"forty-eight": 48, "forty-eighth": 48,
"forty-nine": 49, "forty-ninth": 49,
"fifty": 50, "fiftieth": 50
};
const levelRegex = /\b(level|floor|on|at)?\s*(\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twenty-one|twenty-two|twenty-three|twenty-four|twenty-five|twenty-six|twenty-seven|twenty-eight|twenty-nine|thirty|thirty-one|thirty-two|thirty-three|thirty-four|thirty-five|thirty-six|thirty-seven|thirty-eight|thirty-nine|forty|forty-one|forty-two|forty-three|forty-four|forty-five|forty-six|forty-seven|forty-eight|forty-nine|fifty|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth)(?:st|nd|rd|th)?\b/gi;
const matches = normalizedInput.matchAll(levelRegex);
// Process matches
for (const match of matches) {
const levelCandidate = match[2]; // Get the potential level part
// If numeric, return directly
if (!isNaN(levelCandidate)) {
return parseInt(levelCandidate, 10);
}
// If word-based, map to a number
if (wordToNumberMap[levelCandidate]) {
return wordToNumberMap[levelCandidate];
}
}
// Return null if no level found
return null;
}
I tried this using regex pattern matching and was expecting the resolution of numbers from the input string.
One option is to place the single digit word matches (i.e one
, two
, ..., twenty
..) at the end of the regex expression. So your regex expression will look like this:
const levelRegex = /\b(level|floor|on|at)?\s*(\d+|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty-one|twenty-two|twenty-three|twenty-four|twenty-five|twenty-six|twenty-seven|twenty-eight|twenty-nine|thirty|thirty-one|thirty-two|thirty-three|thirty-four|thirty-five|thirty-six|thirty-seven|thirty-eight|thirty-nine|forty|forty-one|forty-two|forty-three|forty-four|forty-five|forty-six|forty-seven|forty-eight|forty-nine|fifty|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth|one|two|three|four|five|six|seven|eight|nine|ten|twenty)(?:st|nd|rd|th)?\b/gi;
Then this will resolve the "twenty-third" to 23.
const wordToNumberMap = {
"one": 1, "first": 1,
"two": 2, "second": 2,
"three": 3, "third": 3,
"four": 4, "fourth": 4,
"five": 5, "fifth": 5,
"six": 6, "sixth": 6,
"seven": 7, "seventh": 7,
"eight": 8, "eighth": 8,
"nine": 9, "ninth": 9,
"ten": 10, "tenth": 10,
"eleven": 11, "eleventh": 11,
"twelve": 12, "twelfth": 12,
"thirteen": 13, "thirteenth": 13,
"fourteen": 14, "fourteenth": 14,
"fifteen": 15, "fifteenth": 15,
"sixteen": 16, "sixteenth": 16,
"seventeen": 17, "seventeenth": 17,
"eighteen": 18, "eighteenth": 18,
"nineteen": 19, "nineteenth": 19,
"twenty": 20, "twentieth": 20,
"twenty-one": 21, "twenty-first": 21,
"twenty-two": 22, "twenty-second": 22,
"twenty-three": 23, "twenty-third": 23,
"twenty-four": 24, "twenty-fourth": 24,
"twenty-five": 25, "twenty-fifth": 25,
"twenty-six": 26, "twenty-sixth": 26,
"twenty-seven": 27, "twenty-seventh": 27,
"twenty-eight": 28, "twenty-eighth": 28,
"twenty-nine": 29, "twenty-ninth": 29,
"thirty": 30, "thirtieth": 30,
"thirty-one": 31, "thirty-first": 31,
"thirty-two": 32, "thirty-second": 32,
"thirty-three": 33, "thirty-third": 33,
"thirty-four": 34, "thirty-fourth": 34,
"thirty-five": 35, "thirty-fifth": 35,
"thirty-six": 36, "thirty-sixth": 36,
"thirty-seven": 37, "thirty-seventh": 37,
"thirty-eight": 38, "thirty-eighth": 38,
"thirty-nine": 39, "thirty-ninth": 39,
"forty": 40, "fortieth": 40,
"forty-one": 41, "forty-first": 41,
"forty-two": 42, "forty-second": 42,
"forty-three": 43, "forty-third": 43,
"forty-four": 44, "forty-fourth": 44,
"forty-five": 45, "forty-fifth": 45,
"forty-six": 46, "forty-sixth": 46,
"forty-seven": 47, "forty-seventh": 47,
"forty-eight": 48, "forty-eighth": 48,
"forty-nine": 49, "forty-ninth": 49,
"fifty": 50, "fiftieth": 50
};
const levelRegex = /\b(level|floor|on|at)?\s*(\d+|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty-one|twenty-two|twenty-three|twenty-four|twenty-five|twenty-six|twenty-seven|twenty-eight|twenty-nine|thirty|thirty-one|thirty-two|thirty-three|thirty-four|thirty-five|thirty-six|thirty-seven|thirty-eight|thirty-nine|forty|forty-one|forty-two|forty-three|forty-four|forty-five|forty-six|forty-seven|forty-eight|forty-nine|fifty|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|twenty-first|twenty-second|twenty-third|twenty-fourth|twenty-fifth|twenty-sixth|twenty-seventh|twenty-eighth|twenty-ninth|thirtieth|thirty-first|thirty-second|thirty-third|thirty-fourth|thirty-fifth|thirty-sixth|thirty-seventh|thirty-eighth|thirty-ninth|fortieth|forty-first|forty-second|forty-third|forty-fourth|forty-fifth|forty-sixth|forty-seventh|forty-eighth|forty-ninth|fiftieth|one|two|three|four|five|six|seven|eight|nine|ten|twenty)(?:st|nd|rd|th)?\b/gi;
function extractLevelFromString(input) {
// Normalize the input string
const normalizedInput = input.toLowerCase();
const matches = normalizedInput.matchAll(levelRegex);
// Process matches
for (const match of matches) {
const levelCandidate = match[2]; // Get the potential level part
// If numeric, return directly
if (!isNaN(levelCandidate)) {
return parseInt(levelCandidate, 10);
}
// If word-based, map to a number
if (wordToNumberMap[levelCandidate]) {
return wordToNumberMap[levelCandidate];
}
}
// Return null if no level found
return null;
}
console.log(extractLevelFromString('Twenty-Third'))
console.log(extractLevelFromString('Twenty-Five'))