cparsingrecursiongrammarrecursive-descent

Accepting an Equal Sign using Recursive Descent Parsing in C


I am currently trying to build a recursive descent parser that parses assignment statements such as a = 4 + b. The grammar looks as follows

<stmt> → id = <expr>
<expr> → <term> {(+ | -) <term>}
<term> → <factor> {(* | /) <factor>}
<factor> → id | int_constant | ( <expr> )

This grammar should accept statements such as sum = sum + 100 * (a - b)

ddddd
djdjdjd
// THIS IS ALL TEXTBOOK CODE

/* front.c - a lexical analyzer system for simple
arithmetic expressions */
#include <stdio.h>
#include <ctype.h>
/* Global declarations */
/* Variables */
int charClass;
char lexeme [100];
char nextChar;
int lexLen;
int token;
int nextToken;
FILE *in_fp, *fopen();

/* Function declarations */
void addChar();
void getChar();
void getNonBlank();
int lex();

/* Character classes */
#define LETTER 0
#define DIGIT 1
#define UNKNOWN 99

/* Token codes */
#define INT_LIT 10
#define IDENT 11
#define ASSIGN_OP 20
#define ADD_OP 21
#define SUB_OP 22
#define MULT_OP 23
#define DIV_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26


// function prototypes for just expr()
void expr();
void stmt();

// define the grammar
/*
<expr> → <term> {(+ | -) <term>}
<term> → <factor> {(* | /) <factor>}
<factor> → id | int_constant | ( <expr> )
*/
/******************************************************/
/* main driver */
int main() {
/* Open the input data file and process its contents */
  if ((in_fp = fopen("front.in", "r")) == NULL)
    // open the file
    printf("ERROR - cannot open front.in \n");
  else {
    // get characters and lex()?
    getChar();
    do {
      printf("[FROM main()] before lex() %d \n",nextToken);

      lex();

      printf("[FROM main()] %d \n\n", nextToken);

      stmt();
    } while (nextToken != EOF);
  }

  // seems like the tokenizer is already implemented for us 
-> will need to parse as tokens come in


}

void error(){
  printf("ERROR: \n");
}

// the stmt function
void stmt() {
  printf("Enter <stmt>\n\n");

  if (nextToken == IDENT){
    printf("[FROM stmt()] found identifier. \n");
    printf("[FROM stmt()] before lex() %d \n", nextChar);
    lex(); //error in here
    printf("[FROM stmt()] after lex() %d \n\n", nextToken);
  }

  //printf("[FROM stmt()] nxt token %d\n\n", nextToken);
}
void factor(){
  // must choose between two RHSs
  printf("Enter <factor>\n");

  // determine the RHSs
  if(nextToken == IDENT || nextToken == INT_LIT){
    lex(); // get next token
  }
  // if the RHS is <expr> tell the lex to pass over the 
left parenthesis and call expr and then check for right 
parentheses
  else {
    if(nextToken == LEFT_PAREN){
      lex();
      expr();
      if(nextToken == RIGHT_PAREN){
        lex();
      }else {
        error(); // should exist at the very top
      }
    }
    else {
      // it wasnt an id or int or left parentheses
      error();
    }
  }

  printf("Exit <factor>\n");
}

// parse term  
void term() {
  printf("Enter <term>\n");

  // parse the first factor
  factor();

  // as long as the next is * or / get the next token and parse the next factor
  while (nextToken == MULT_OP || nextToken == DIV_OP){
    lex(); // get the next token
    factor(); // parse the next factor
  }
  printf("Enter <expr> \n"); 
}

// parse expression
void expr() {
  printf("Enter <expr> \n");

  // parse first term
  term();

  // as long as the next token is + or -, get the next token and parse the next term
  while(nextToken == ADD_OP || nextToken == SUB_OP){
    // updates next token
    lex();

    // proocesses the term
    term();
  }
  printf("Exit <expr> \n");
}



/*****************************************************/
/* lookup - a function to lookup operators and parentheses
 and return the token */
int lookup(char ch) {
  switch (ch) {
    case '(':
      addChar();
      nextToken = LEFT_PAREN;
      break;
    case ')':
      addChar();
      nextToken = RIGHT_PAREN;
      break;
    case '+':
      addChar();
      nextToken = ADD_OP;
      break;
    case '-':
      addChar();
      nextToken = SUB_OP;
      break;
    case '*':
      addChar();
      nextToken = MULT_OP;
      break;
    case '/':
      addChar();
      nextToken = DIV_OP;
      break;
    case '=':
    // if the equal sign then set as assign op -> not sure 
  what add char does
      printf("[FROM lookup()] found the = \n");
      addChar();
      nextToken = ASSIGN_OP;

    default:
      addChar();
      nextToken = EOF;
      break;
  }
  return nextToken;
}
/*****************************************************/
/* addChar - a function to add nextChar to lexeme */
void addChar() {
  if (lexLen <= 98) {
    lexeme[lexLen++] = nextChar;
    lexeme[lexLen] = 0;
  }
  else
    printf("Error - lexeme is too long \n");
}
/*****************************************************/
/* getChar - a function to get the next character of
 input and determine its character class */
void getChar() {
  if ((nextChar = getc(in_fp)) != EOF){
    //printf("\n[FROM getChar()] nextChar = %c \n\n",nextChar);
  if (isalpha(nextChar))
     charClass = LETTER;
     else if (isdigit(nextChar))
      charClass = DIGIT;
  else charClass = UNKNOWN;
 }
 else
 charClass = EOF;
}
/*****************************************************/
/* getNonBlank - a function to call getChar until it
 returns a non-whitespace character */
void getNonBlank() {
  while (isspace(nextChar))
    getChar();
}
/*****************************************************/
/* lex - a simple lexical analyzer for arithmetic
 expressions */
int lex() {
 lexLen = 0;
 getNonBlank();
 switch (charClass) {

/* Parse identifiers */
 case LETTER:
  addChar();
  getChar();

  while (charClass == LETTER || charClass == DIGIT) {
    addChar();
    getChar();
  }

  nextToken = IDENT;
  break;

/* Parse integer literals */
 case DIGIT:
  addChar();
  getChar();

  while (charClass == DIGIT) {
    addChar();
    getChar();
  }

  nextToken = INT_LIT;
  break;

/* Parentheses and operators */
 case UNKNOWN:
  printf("[FROM lex()] %c\n",nextChar);
  lookup(nextChar);
  getChar();
  break;

/* EOF */
 case EOF:
  nextToken = EOF;
  lexeme[0] = 'E';
  lexeme[1] = 'O';
  lexeme[2] = 'F';
  lexeme[3] = 0;
  break;
 } /* End of switch */

 printf("Next token is: %d, Next lexeme is %s\n", nextToken, lexeme);


 return nextToken;
} /* End of function lex */

The input of this code (in the file front.in) is:

myVar = 5

and the output I am getting is:

[FROM main()] before lex() 0 
Next token is: 11, Next lexeme is myVar
[FROM main()] 11 

Enter <stmt>

[FROM stmt()] found identifier. 
[FROM stmt()] before lex() 32 
[FROM lex()] =
[FROM lookup()] found the = 
Next token is: -1, Next lexeme is ==
[FROM stmt()] after lex() -1

Th expected output should display all the statements such as Enter <stmt> and Exit <stmt> etc for each non terminal in the grammar.

The main problem (that I see) is that it defines the next lexeme as '==' instead of '=', so the parser does not recognize it.

Any ideas as to why this happens?


Solution

  • The case = of the switch in lookup(char ch) has no break. You might try and add one.