compiler-constructionyacclalrtigerml-yacc

How can I resolve this shift-reduce conflict?


I'm following Appel's book Modern Compiler Implementation in ML and am trying to write a grammar for Tiger. Here's my first attempt:

%%
%term
    EOF 
  | ID of string
  | INT of int | STRING of string 
  | COMMA | COLON | SEMICOLON | LPAREN | RPAREN | LBRACK | RBRACK 
  | LBRACE | RBRACE | DOT 
  | PLUS | MINUS | UMINUS | TIMES | DIVIDE | EQ | NEQ | LT | LE | GT | GE
  | AND | OR | ASSIGN
  | ARRAY | IF | THEN | ELSE | WHILE | FOR | TO | DO | LET | IN | END | OF 
  | BREAK | NIL
  | FUNCTION | VAR | TYPE 

%nonterm  exp
        | program
        | lvalue
        | assignment
        | ifthenelse
        | ifthen
        | whileloop
        | forloop
        | seq
        | funcall
        | funargs
        | arith
        | cmp
        | bool
        | let
        | unit
        | exps
        | decs
        | dec
        | tydec
        | vardec
        | fundec
        | ty
        | tyfields
        | tyfield
        | expseq
        | record
        | array
        | fields
        | field

%pos int
%verbose
%start program
%eop EOF
%noshift EOF

%name Tiger

%keyword WHILE FOR TO BREAK LET IN END FUNCTION VAR TYPE ARRAY IF THEN ELSE DO OF NIL

%prefer THEN ELSE LPAREN

%value ID ("bogus")
%value INT (1)
%value STRING ("")

%nonassoc ASSIGN
%left AND OR
%nonassoc EQ NEQ GT LT GE LE
%left PLUS MINUS
%left TIMES DIVIDE
%left UMINUS

%%

program: exp ()
exp:  lvalue ()
    | assignment ()
    | ifthenelse ()
    | ifthen ()
    | whileloop ()
    | forloop ()
    | seq ()
    | INT ()
    | STRING ()
    | funcall ()
    | arith ()
    | cmp ()
    | bool ()
    | let ()
    | LPAREN exp RPAREN ()
    | unit ()
    | NIL ()
    | BREAK ()
    | record ()
    | array ()

unit: LPAREN RPAREN ()

lvalue: ID ()
      | lvalue DOT ID ()
      | lvalue LBRACK exp RBRACK ()
      | ID LBRACK exp RBRACK ()

seq: LPAREN exp SEMICOLON exps RPAREN ()

exps: exp SEMICOLON exps ()
      | exp ()

funcall: ID LPAREN RPAREN ()
      |  ID LPAREN funargs RPAREN ()

funargs: exp COMMA funargs ()
      | exp ()

arith:  exp PLUS exp ()
      | exp MINUS exp ()
      | exp TIMES exp ()
      | exp DIVIDE exp ()
      | MINUS exp %prec UMINUS ()

cmp:    exp EQ exp ()
      | exp NEQ exp ()
      | exp GT exp ()
      | exp LT exp ()
      | exp GE exp ()
      | exp LE exp ()

bool:   exp AND exp ()
      | exp OR exp ()

assignment: lvalue ASSIGN exp ()

ifthenelse: IF exp THEN exp ELSE exp ()

ifthen: IF exp THEN exp ()

whileloop: WHILE exp DO exp ()

forloop: FOR ID ASSIGN exp TO exp DO exp ()

decs: decs dec ()
      | ()

dec: tydec ()
      | vardec ()
      | fundec ()

tydec: TYPE ID EQ ty ()

ty: ID ()
      | LBRACE tyfields RBRACE ()
      | LBRACE RBRACE ()
      | ARRAY OF ID ()

tyfields: tyfield COMMA tyfields ()
      | tyfield ()

tyfield: ID COLON ID ()

vardec: VAR ID ASSIGN exp ()
      | VAR ID COLON ID ASSIGN exp ()

fundec: FUNCTION ID LPAREN tyfields RPAREN EQ exp ()
      | FUNCTION ID LPAREN tyfields COLON ID RPAREN EQ exp ()

let: LET decs IN expseq END ()

expseq: exp SEMICOLON expseq ()
 | ()

record: ID LBRACE fields RBRACE ()

fields: field COMMA fields ()
 | ()

field: ID EQ exp ()

array: ID LBRACK exp RBRACK OF exp ()

This grammar has lots of shift/reduce conflicts relating to all the exp op exp rules. I think that's probably mostly harmless, but there is one conflict in particular that is causing problems:

error:  state 132: shift/reduce conflict (shift OR, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift AND, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift GE, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift GT, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift LE, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift LT, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift NEQ, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift EQ, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift DIVIDE, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift TIMES, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift MINUS, reduce by rule 75)
error:  state 132: shift/reduce conflict (shift PLUS, reduce by rule 75)

state 132:

        arith : exp . PLUS exp
        arith : exp . MINUS exp
        arith : exp . TIMES exp
        arith : exp . DIVIDE exp
        cmp : exp . EQ exp
        cmp : exp . NEQ exp
        cmp : exp . GT exp
        cmp : exp . LT exp
        cmp : exp . GE exp
        cmp : exp . LE exp
        bool : exp . AND exp
        bool : exp . OR exp
        array : ID LBRACK exp RBRACK OF exp .  (reduce by rule 75)

        PLUS    shift 42
        MINUS   shift 41
        TIMES   shift 40
        DIVIDE  shift 39
        EQ      shift 38
        NEQ     shift 37
        LT      shift 36
        LE      shift 35
        GT      shift 34
        GE      shift 33
        AND     shift 32
        OR      shift 31


        .       reduce by rule 75

If you attempt to use the parser generated by ML-Yacc on queens.tig it results in a parse error. Here is the program for reference:

/* A program to solve the 8-queens problem */

let
    var N := 8

    type intArray = array of int

    var row := intArray [ N ] of 0
    var col := intArray [ N ] of 0
    var diag1 := intArray [N+N-1] of 0
    var diag2 := intArray [N+N-1] of 0

    function printboard() =
       (for i := 0 to N-1
     do (for j := 0 to N-1 
          do print(if col[i]=j then " O" else " .");
         print("\n"));
         print("\n"))

    function try(c:int) = 
( /*  for i:= 0 to c do print("."); print("\n"); flush();*/
     if c=N
     then printboard()
     else for r := 0 to N-1
       do if row[r]=0 & diag1[r+c]=0 & diag2[r+7-c]=0
               then (row[r]:=1; diag1[r+c]:=1; diag2[r+7-c]:=1;
                 col[c]:=r;
                     try(c+1);
             row[r]:=0; diag1[r+c]:=0; diag2[r+7-c]:=0)

)
 in try(0)
end

This is the error:

- queens.tig:13.1:syntax error: replacing  FUNCTION with  PLUS
queens.tig:33.1:syntax error found at END

uncaught exception Error
  raised at: parsetest.sml:17.47-17.61

The parser is struggling with this:

    var diag2 := intArray [N+N-1] of 0

    function printboard() =

I don't understand why the parser wants to shift in this case while FUNCTION is the lookahead symbol, because you cannot shift without making a substitution, whereas you can reduce (I thought shift/reduce conflicts were only relevant for ambiguous strings). I'm unsure how to resolve these conflicts because I'm not clear why the parser is taking such a greedy approach and shifting at all costs. I've been stuck on this for hours, help would be greatly appreciated. Am I going about it all wrong?


Solution

  • The error is coming from the fact that your function definition rules:

    tyfields: tyfield COMMA tyfields ()
          | tyfield ()
    
    tyfield: ID COLON ID ()
    
    fundec: FUNCTION ID LPAREN tyfields RPAREN EQ exp ()
          | FUNCTION ID LPAREN tyfields COLON ID RPAREN EQ exp ()
    

    can't match a function declaration with no arguments (just a bare () after the name). So ML-Yacc tries to recover by replacing the FUNCTION with a PLUS and keep going, but that doesn't really help.

    IMO ML-Yacc's "agressive" default error recovery generally hurts more than it helps -- any error recovery in a parser needs to be carefully tuned in order to not cause more confusion when it does something unexpected.


    To go back to the question in the title, the shift/reduce conflicts all come from expression precedence ambiguities. You have precedence declarations on some rules for expressions (from their tokens), but not all of them -- the "statement-like" expressions (ifthen, loops, array, etc) all need precedences as well.