I am currently trying to make my own little programming language for the first time. I am, as of now, creating the basic lexer. I am trying to allow floating point values in my code, but alas, it gets split into an identifier and a float value (see output below)
Question: How do I fix my lexer to properly handle floats?
Output: let value = 7.24 const pi = 3.14
Code:
function lexer(input) {
const tokens = [];
const keywords = new Set(['let', 'var', 'const', 'def', 'float', 'floater', 'double', 'int', 'integer', 'bool', 'boolean', 'string', 'char']);
const alphaNumericRegex = /[a-zA-Z0-9]/;
const digitRegex = /\d/;
let current = 0;
let length = input.length;
while (current < length) {
let char = input[current];
if (char === ' ' || char === '\n') {
current++;
continue;
}
if (alphaNumericRegex.test(char)) {
let wordStart = current;
while (alphaNumericRegex.test(char)) {
char = input[++current];
}
let word = input.slice(wordStart, current);
if (keywords.has(word)) {
tokens.push({ type: 'keyword', value: word });
} else {
tokens.push({ type: 'identifier', value: word });
}
continue;
}
if (digitRegex.test(char) || char === '.') {
let numStart = current;
let hasDecimal = false;
while (digitRegex.test(char) || (!hasDecimal && char === '.')) {
if (char === '.') {
hasDecimal = true;
}
char = input[++current];
}
let numStr = input.slice(numStart, current);
let num = parseFloat(numStr);
if (isNaN(num)) {
throw new SyntaxError("Invalid number");
}
if (Number.isInteger(num)) {
tokens.push({ type: 'number', value: num });
} else {
tokens.push({ type: 'number', value: parseFloat(numStr) });
}
continue;
}
if (char === '"') {
let strStart = ++current;
while (input[current] !== '"') {
if (++current >= length) throw new SyntaxError("Unterminated string literal");
}
let str = input.slice(strStart, current++);
tokens.push({ type: 'string', value: str });
continue;
}
if (char === "'") {
let charValue = input[++current];
if (input[++current] === "'") {
tokens.push({ type: 'char', value: charValue });
current++;
} else {
throw new SyntaxError("Invalid character literal");
}
continue;
}
if (char === '=') {
tokens.push({ type: 'assign' });
current++;
continue;
}
if (char === ';') {
tokens.push({ type: 'semicolon' });
current++;
continue;
}
if (char === '.') {
tokens.push({ type: 'dot' });
current++;
continue;
}
current++;
}
return tokens;
}
const code = `let value = 7.24;
var count = 5;
const pi = 3.14;
bool isTrue = true;
string message = "Hello";
char initial = 'A';`;
console.log(JSON.stringify(lexer(code), null, 2));
I'm still somewhat new to JavaScript and completely new to lexers, so I tried getting help from ai tools such as ChatGPT and AskCodi. They attempted to fix the problem and any changes I made from their recommendations made no difference.
all I did was change alphaNumeric from /[a-zA-Z0-9]/
to /[a-zA-Z0-9-.]/
in an attempt to include the dot when parsing a number and funny enough, it worked :D
window.onload=_=>document.getElementsByClassName('as-console-wrapper')[0].style.maxHeight='100%';
//this above just to make the console seen in its full space instead of half
function lexer(input) {
const tokens = [];
const keywords = new Set(['let', 'var', 'const', 'def', 'float', 'floater', 'double', 'int', 'integer', 'bool', 'boolean', 'string', 'char']);
const alphaNumericRegex = /[a-zA-Z0-9-.]/;
const digitRegex = /\d/;
let current = 0;
let length = input.length;
while (current < length) {
let char = input[current];
if (char === ' ' || char === '\n') {
current++;
continue;
}
if (alphaNumericRegex.test(char)) {
let wordStart = current;
while (alphaNumericRegex.test(char)) {
char = input[++current];
}
let word = input.slice(wordStart, current);
if (keywords.has(word)) {
tokens.push({ type: 'keyword', value: word });
} else {
tokens.push({ type: 'identifier', value: word });
}
continue;
}
if (digitRegex.test(char) || char === '.') {
let numStart = current;
let hasDecimal = false;
while (digitRegex.test(char) || (!hasDecimal && char === '.')) {
if (char === '.') {
hasDecimal = true;
}
char = input[++current];
}
let numStr = input.slice(numStart, current);
let num = parseFloat(numStr);
if (isNaN(num)) {
throw new SyntaxError("Invalid number");
}
if (Number.isInteger(num)) {
tokens.push({ type: 'number', value: num });
} else {
tokens.push({ type: 'number', value: parseFloat(numStr) });
}
continue;
}
if (char === '"') {
let strStart = ++current;
while (input[current] !== '"') {
if (++current >= length) throw new SyntaxError("Unterminated string literal");
}
let str = input.slice(strStart, current++);
tokens.push({ type: 'string', value: str });
continue;
}
if (char === "'") {
let charValue = input[++current];
if (input[++current] === "'") {
tokens.push({ type: 'char', value: charValue });
current++;
} else {
throw new SyntaxError("Invalid character literal");
}
continue;
}
if (char === '=') {
tokens.push({ type: 'assign' });
current++;
continue;
}
if (char === ';') {
tokens.push({ type: 'semicolon' });
current++;
continue;
}
if (char === '.') {
tokens.push({ type: 'dot' });
current++;
continue;
}
current++;
}
return tokens;
}
const code = `let value = 7.24;
var count = 5;
const pi = 3.14;
bool isTrue = true;
string message = "Hello";
char initial = 'A';`;
console.log(JSON.stringify(lexer(code), null, 2));