bisonflex-lexerbisonc++

flex/bison line 25: syntax error at '' . last line of the file


Im trying to compile a flex proyect, but I always get the same error, I search in all SO, and try most of the solutions, like using a token <> and return 1 or 0, or call yywrap()... but without luck.

And I dont see where is the error, I have 84 warnings of shifts/reduce conflicts but I that could be the problem.

This is the tokens.l:

%option yylineno

%{
 #include <string>
 #include <iostream>
 #include <vector>
 using namespace std ;
 #include "parser.hpp" 

 extern "C" int yywrap() { return(1); }
 #define TOKEN(t) processtoken(t, string(yytext, yyleng))

 void processtoken (int t, string id) {
   cout << t << " <" << id << ">" << endl ;
 }

%}

%option nounput

%%

programa                                TOKEN(RPROGRAM);
procedimiento                           TOKEN(RPROC);
variables                               TOKEN(RVAR);
si                                      TOKEN(RIF);
entonces                                TOKEN(RTHEN);
repetir                                 TOKEN(RREPEAT);
hasta                                   TOKEN(RUNTIL);
siempre                                 TOKEN(RALWAYS);
salir                                   TOKEN(RSALIR);
leer                                    TOKEN(RREAD);
escribir_linea                          TOKEN(RWRITE);
in                                      TOKEN(RIN);
out                                     TOKEN(ROUT);
entero                                  TOKEN(RENTERO);
real                                    TOKEN(RREAL);


[a-zA-Z](_?[a-zA-Z0-9])*                TOKEN(TID); 
\$[a-zA-Z](\-{0,1}[a-zA-Z0-9])*\-?      TOKEN(TID);
\$[a-zA-Z](\_{0,2}[a-zA-Z0-9])*         TOKEN(TID);
\(\*([^*]|\*+[^*)])*\*+\)               ; 

[ \t\n]                                 ;
[0-9]+\.[0-9]*                          TOKEN(TDOUBLE);
[0-9]+                                  TOKEN(TINTEGER); 
[0-9]+\.[0-9]+([eE][-+]?[0-9]+)?        TOKEN(TREAL);

"="                                     TOKEN(TASSIG);
"=="                                    TOKEN(TEQUAL);
">"                                     TOKEN(TCGT);
">="                                    TOKEN(TCGE);
"<"                                     TOKEN(TCLT);
"<="                                    TOKEN(TCLE);
"+"                                     TOKEN(TPLUS);
"-"                                     TOKEN(TMINUS);
"*"                                     TOKEN(TMUL);
"/"                                     TOKEN(TDIV);
"/="                                    TOKEN(TCNE);
"("                                     TOKEN(TLPAREN);
")"                                     TOKEN(TRPAREN);
"{"                                     TOKEN(TLBRACE);
"}"                                     TOKEN(TRBRACE);
","                                     TOKEN(TCOMMA);
":"                                     TOKEN(TCOLON);
";"                                     TOKEN(TSEMIC);

.                                       { cout << "Token desconocido: " << yytext << endl; yyterminate();}

%%

And this is the parser.y:

%{
   #include <stdio.h>
   #include <iostream>
   #include <vector>
   #include <string>
   using namespace std; 

   extern int yylex();
   extern int yylineno;
   extern char *yytext;
   string tab = "\t" ;
   void yyerror (const char *msg) {
     printf("line %d: %s at '%s'\n", yylineno, msg, yytext) ;
   }


%}

%union {
    string  *str ; 
}

%token <str> RPROGRAM RPROC RVAR RIF RTHEN RREPEAT RUNTIL
%token <str> RALWAYS RSALIR RREAD RWRITE RENTERO RREAL
%token <str> RIN ROUT

%token <str> TID TDOUBLE TREAL TINTEGER

%token <str> TASSIG TEQUAL TCGT TCGE TCLT TCLE TCNE
%token <str> TPLUS TMINUS TMUL TDIV
%token <str> TLPAREN TRPAREN TLBRACE TRBRACE TCOMMA TCOLON TSEMIC

%type <str> programa
%type <str> declaraciones
%type <str> lista_de_ident
%type <str> resto_lista_id
%type <str> tipo
%type <str> decl_de_subprogs
%type <str> decl_de_subprograma
%type <str> argumentos
%type <str> lista_de_param
%type <str> clase_par
%type <str> resto_lis_de_param
%type <str> lista_de_sentencias
%type <str> sentencia
%type <str> variable
%type <str> expresion

%left TPLUS TMINUS TMUL TDIV

%start programa 

%%

programa :  RPROGRAM TID
        declaraciones
        decl_de_subprogs
        TLBRACE lista_de_sentencias TRBRACE
        ;


declaraciones : RVAR lista_de_ident TCOLON tipo TSEMIC declaraciones
        | {}
        ;

lista_de_ident : TID resto_lista_id
        ;

resto_lista_id : TCOMMA TID resto_lista_id
        | {}
        ;

tipo : RENTERO | RREAL
        ;

decl_de_subprogs : decl_de_subprograma decl_de_subprogs
        | {}
        ;

decl_de_subprograma : RPROC TID argumentos declaraciones
                    TLBRACE lista_de_sentencias TRBRACE
        ;

argumentos : TLPAREN lista_de_param TRPAREN
        | {}
        ;

lista_de_param : lista_de_ident TCOLON clase_par tipo resto_lis_de_param
        ;

clase_par : RIN | ROUT | RIN ROUT
        ;

resto_lis_de_param : TSEMIC lista_de_ident TCOLON clase_par tipo resto_lis_de_param
        | {}
        ;

lista_de_sentencias : sentencia lista_de_sentencias
        | {}
        ;

sentencia : variable TASSIG expresion TSEMIC
        | RIF expresion RTHEN TLBRACE lista_de_sentencias TRBRACE
        | RREPEAT TLBRACE lista_de_sentencias TRBRACE RUNTIL expresion TSEMIC
        | RREPEAT RALWAYS TLBRACE lista_de_sentencias TRBRACE
        | RSALIR RIF expresion
        | RREAD TLPAREN variable TRPAREN TSEMIC
        | RWRITE TLPAREN expresion TRPAREN TSEMIC
        ;

variable : TID
        ;

expresion : expresion TEQUAL expresion
        | expresion TCGT expresion
        | expresion TCLT expresion
        | expresion TCGE expresion
        | expresion TCLE expresion
        | expresion TCNE expresion
        | expresion TPLUS expresion
        | expresion TMINUS expresion
        | expresion TMUL expresion
        | expresion TDIV expresion
        | TID 
        | TINTEGER
        | TREAL
        | TLPAREN expresion TRPAREN
        ;

And the test program Im using to try all the tokens is the next one:

programa ejemplo
    variables a,b,c : entero;
    variables d,e : real;
(* esto es un comentario *)

procedimiento sumar (x,y: in entero; resul: in out entero) 
    variables aux:entero;
    {
        repetir {
            aux=x; 
            resul=y;
            aux = aux - 1;
            resul = resul+1;
        } hasta aux /= 0;
    }

{
    leer(a); leer(b);
    d= 1/b;
    e= 1/a;
    sumar(a,b,c); (* los que hagan llamadas a procedimientos *)
    c= c*(c*d)+e;
    escribir_linea(c*c);
}

I always get at the last line the error: line 25: syntax error at '', but I dont know what that means, because I have a token for spaces, the token of EOF doesnt work, I tried with << EOF >> {yywrap();} but it doesnt finish.


Solution

  • Nowhere in your scanner definition do you ever return a token to the parser. So the only token the parser will ever see is the END token automatically returned by flex when it sees an EOF.

    But your grammar does not accept an empty input; it insists that the first token in the input be programa (for example). So naturally when it sees the EOF, it will report a syntax error.

    When the EOF is detected, there is no token text, so yytext will not be valid. (It might even be NULL, so it definitely should not be used.) On the whole, it is not really a good idea to use yytext outside of a lexer action, and the particular use of it in yyerror invokes Undefined Behaviour (although you will from time to time see it in bison examples).