I'm writing interpreter for simple lambda calculus based language in C. EBNF for language is
S ::= E
E ::= 'fn' var '->' E | T {'+' T} | T {'-' T}
T ::= F {'*' F} | F {'/' F}
F ::= P {P}
P ::= var | number | '(' E ')'
I've already written parser for same grammar but without 4-th production rule. But how I could deal with {P}
?
Of course, I can just check lookahead token, and if it's var, number or '(' then call procedure for parsing P (say parse_primitive
), but in this case I will have same extra check of lookahead in parse_primitive
, so i definitely doing something wrong.
EDIT: my code now looks like this:
Ast *parse_expr(Lexer *lexer) {
if (match_token(lexer, TOK_KW_FN) {
expect_token(lexer, TOK_VAR);
char *lexeme = get_last_lexeme(lexer);
Ast *var = make_var(lexeme);
expect_token(lexer, TOK_ARROW);
Ast *body = parse_expr(lexer);
return make_ast(var, body);
} else {
Ast *expr = parse_term(lexer);
for (;;) {
int op;
if (match_token(lexer, TOK_PLUS)) {
op = OP_ADD;
} else if (match_token(lexer, TOK_MINUS)) {
op = OP_SUB;
} else {
return expr;
}
Ast *term = parse_term(lexer);
expr = make_binop(op, expr, term);
}
}
}
Ast *parse_term(Lexer *lexer) {
Ast *term = parse_factor(lexer);
for (;;) {
int op;
if (match_token(lexer, TOK_STAR)) {
op = OP_MUL;
} else if (match_token(lexer, TOK_SLASH)) {
op = OP_DIV;
} else {
return term;
}
Ast *factor = parse_factor(lexer);
term = make_binop(op, term, factor);
}
}
Ast *parse_factor(Lexer *lexer) {
Ast *factor = parse_primitive(lexer);
for (;;) {
int tok = lexer_peek(lexer);
if (tok != TOK_LPAR && tok != TOK_VAR && tok != TOK_NUMBER) { /* i want to remove this check as if is actually done by parse_primitive */
return factor;
}
Ast *primitive= parse_primitive(lexer);
factor = make_app(factor, primitive);
}
}
Ast *parse_primitive(Lexer *lexer) {
if (lexer_match(lexer, TOK_LPAR)) {
Ast *expr = parse_expr(lexer);
if (!lexer_match(lexer, TOK_RPAR)) {
parser_error("expected ')'");
}
return expr;
} else if (lexer_match(lexer, TOK_NUMBER)) {
char *lexeme = get_last_lexeme(lexer);
return make_number(lexeme);
} else if (lexer_match(lexer, TOK_VAR)) {
char *lexeme = get_last_lexeme(lexer);
return make_var(lexeme);
}
parser_error("expected '(', number or var");
}
You could add an argument to parse_primitive
to indicate whether the primitive is required or is optional. In case it is optional, don't raise an error when it is not matched, but return NULL
:
Ast *parse_primitive(Lexer *lexer, int mandatory) {
if (lexer_match(lexer, TOK_LPAR)) {
Ast *expr = parse_expr(lexer);
if (!lexer_match(lexer, TOK_RPAR)) {
// Here we unconditionally raise an error, as the opening bracket
// made the closing bracket mandatory.
parser_error("expected ')'");
}
return expr;
} else if (lexer_match(lexer, TOK_NUMBER)) {
char *lexeme = get_last_lexeme(lexer);
return make_number(lexeme);
} else if (lexer_match(lexer, TOK_VAR)) {
char *lexeme = get_last_lexeme(lexer);
return make_var(lexeme);
}
// Conditionally raise an error
if (mandatory) {
parser_error("expected '(', number or var");
}
return NULL;
}
Now your parse_factor
function can look like this:
Ast *parse_factor(Lexer *lexer) {
Ast *factor = parse_primitive(lexer, 1); // Mandatory
for (;;) {
Ast *primitive = parse_primitive(lexer, 0); // Not mandatory
if (!primitive) {
return factor;
}
factor = make_app(factor, primitive);
}
}
...then create two versions of the function:
Ast *parse_if_primitive(Lexer *lexer) {
if (lexer_match(lexer, TOK_LPAR)) {
Ast *expr = parse_expr(lexer);
if (!lexer_match(lexer, TOK_RPAR)) {
parser_error("expected ')'");
}
return expr;
} else if (lexer_match(lexer, TOK_NUMBER)) {
char *lexeme = get_last_lexeme(lexer);
return make_number(lexeme);
} else if (lexer_match(lexer, TOK_VAR)) {
char *lexeme = get_last_lexeme(lexer);
return make_var(lexeme);
}
return NULL; // No primitive here
}
Ast *parse_primitive(Lexer *lexer) {
Ast *primitive = parse_if_primitive(lexer);
if (!primitive) {
parser_error("expected '(', number or var");
}
return primitive;
}
And call the one you need:
Ast *parse_factor(Lexer *lexer) {
Ast *factor = parse_primitive(lexer); // Mandatory
for (;;) {
Ast *primitive = parse_if_primitive(lexer); // Not mandatory
if (!primitive) {
return factor;
}
factor = make_app(factor, primitive);
}
}