I need to make a recursive descent parser that follows the grammar
<program> → <statements>
<statements>→ <statement> | <statement><semi_colon><statements>
<statement> → <ident><assignment_op><expression>
<expression> → <term><term_tail>
<term_tail> → <add_op><term><term_tail> | ε
<term> → <factor> <factor_tail>
<factor_tail> → <mult_op><factor><factor_tail> | ε
<factor> → <left_paren><expression><right_paren> | <ident> | <const>
<const> → any decimal numbers
<ident> → any names conforming to C identifier rules
<assignment_op> → :=
<semi_colon> → ;
<add_operator> → + | -
<mult_operator> → * | /
<left_paren> → (
<right_paren> →)
I have made a lexer that tokenizes input into token type, token string, and token value.
public Token(int type, String token_string, Object value) {
this.type = type;
this.token_string = token_string;
this.value = value;
}
public class TokenType {
public static final int NUMBER=1;
public static final int LEFT_PAR=2;
public static final int RIGHT_PAR=3;
public static final int PLUS=4;
public static final int MINUS=5;
public static final int STAR=6;
public static final int SLASH=7;
public static final int IDENT=8;
public static final int ASSIGN=9;
public static final int SEMI_COLON=10;
}
I am supposed to parse inputs like
operator1 := 200 + 100 *(100 - 200);
operator2 := operator1 + 300 and return the operator's values
However, I am not sure how I could make the parser.
You add END_OF_TOKEN
to TokenType
.
public class TokenType {
public static final int NUMBER = 1;
.....
public static final int END_OF_TOKEN = 11;
}
You define a Tokenizer
class. It returns a Token
object by calling get()
. For simplicity here it returns the Token
passed to the constructor.
public class Tokenizer {
Token[] tokens;
int index = 0;
Tokenizer(Token... tokens) {
this.tokens = tokens;
}
public Token get() {
if (index < tokens.length)
return tokens[index++];
else
return new Token(TokenType.END_OF_TOKEN, "End of token", null);
}
}
You define a Parser
.
public class Parser {
Tokenizer tokenizer;
Token token;
Map<String, Double> variables = new HashMap<>();
Parser(Tokenizer tokenizer) {
this.tokenizer = tokenizer;
}
double factor() {
if (token.type == TokenType.LEFT_PAR) {
token = tokenizer.get();
double e = expression();
if (token.type != TokenType.RIGHT_PAR)
throw new RuntimeException("')' expected");
token = tokenizer.get();
return e;
} else if (token.type == TokenType.IDENT) {
String name = token.token_string;
token = tokenizer.get();
if (!variables.containsKey(name))
throw new RuntimeException("variable '" + name + "' undefined");
return variables.get(name);
} else if (token.type == TokenType.NUMBER) {
double value = Double.parseDouble(token.token_string);
token = tokenizer.get();
return value;
} else
throw new RuntimeException("unknown token '" + token.token_string + "'");
}
double term() {
double value = factor();
while (true) {
if (token.type == TokenType.STAR) {
token = tokenizer.get();
value *= factor();
} else if (token.type == TokenType.SLASH) {
token = tokenizer.get();
value /= factor();
} else
break;
}
return value;
}
double expression() {
double value = term();
while (true) {
if (token.type == TokenType.PLUS) {
token = tokenizer.get();
value += term();
} else if (token.type == TokenType.MINUS) {
token = tokenizer.get();
value -= term();
} else
break;
}
return value;
}
void statement() {
if (token.type != TokenType.IDENT)
throw new RuntimeException("identifier expected");
String name = token.token_string;
token = tokenizer.get();
if (token.type != TokenType.ASSIGN)
throw new RuntimeException("':=' expected");
token = tokenizer.get();
double value = expression();
variables.put(name, value);
}
void statements() {
while (true) {
statement();
if (token.type != TokenType.SEMI_COLON)
break;
token = tokenizer.get();
}
}
public Map<String, Double> parse() {
token = tokenizer.get();
statements();
if (token.type != TokenType.END_OF_TOKEN)
throw new RuntimeException("extra token '" + token.token_string + "'");
return variables;
}
}
Test:
@Test
public void testParser() {
Tokenizer tokenizer = new Tokenizer(
new Token(TokenType.IDENT, "operator1", null),
new Token(TokenType.ASSIGN, null, null),
new Token(TokenType.NUMBER, "200", null),
new Token(TokenType.PLUS, null, null),
new Token(TokenType.NUMBER, "100", null),
new Token(TokenType.STAR, null, null),
new Token(TokenType.LEFT_PAR, null, null),
new Token(TokenType.NUMBER, "100", null),
new Token(TokenType.MINUS, null, null),
new Token(TokenType.NUMBER, "200", null),
new Token(TokenType.RIGHT_PAR, null, null),
new Token(TokenType.SEMI_COLON, null, null),
new Token(TokenType.IDENT, "operator2", null),
new Token(TokenType.ASSIGN, null, null),
new Token(TokenType.IDENT, "operator1", null),
new Token(TokenType.PLUS, null, null),
new Token(TokenType.NUMBER, "300", null));
Parser parser = new Parser(tokenizer);
Map<String, Double> variables = parser.parse();
System.out.println(variables);
}
output:
{operator2=-9500.0, operator1=-9800.0}