javaparsingantlrgrammarantlr3

ANTLR3: No viable alternative at character


I have this ANTLR3 grammar:

grammar wft;

@header {
    package com.mycompany.wftdiff.parser;

    import com.mycompany.wftdiff.model.*;
}
@lexer::header {
    package com.mycompany.wftdiff.parser;
}
@members {
    private final WftFile wftFile = new WftFile();

    public WftFile getParsingResult() {
        return wftFile;
    }
}
wftFile:
    {
        System.out.println("Heyo!");
    }
    (CommentLine | assignment | NewLine)*
    itemTypeDefinition
    EOF
    ;

/**
 * ItemTypeDefinition
 * DEFINE ITEM_TYPE
 * END ITEM_TYPE
 */
itemTypeDefinition:
    'DEFINE ITEM_TYPE' NewLine
    (KeyName|TransStmt|BaseStmt|NewLine)+
        WhiteSpace* 'DEFINE ITEM_ATTRIBUTE' NewLine
        (KeyName|TransStmt|BaseStmt)*
        WhiteSpace* 'END ITEM_ATTRIBUTE' NewLine
    'END ITEM_TYPE'
    ;

/**
 * KeyName
 * KEY NAME VARCHAR2(8)
 */
KeyName: WhiteSpace* KeyNameStart .* {$channel = HIDDEN;} NewLine;
fragment KeyNameStart: 'KEY NAME VARCHAR2(';

/**
 * TransStmt
 * TRANS DISPLAY_NAME VARCHAR2(80)
 */
TransStmt: WhiteSpace* TransStmtStart .* {$channel = HIDDEN;} NewLine;
fragment TransStmtStart: 'TRANS';

/**
 * BaseStmt
 BASE PROTECT_LEVEL NUMBER
 */
BaseStmt: WhiteSpace* BaseStmtStart .* {$channel = HIDDEN;} NewLine;
fragment BaseStmtStart: 'BASE';

/**
 * Assignment
 */
assignment returns [Assignment assignment]:
    {
        System.out.println("Assignment found!");
    }
    target=AssignmentTarget
    WhiteSpace '=' WhiteSpace
    value=String {
        assignment = new Assignment(target.getText(), value.getText());
        wftFile.addAssignment(new Assignment(target.getText(), value.getText()));
    }
    NewLine;

AssignmentTarget: A (A|D|'_')*;
String: '"' ~'"'* '"'
;

/**
 * Comment
 */
CommentLine: CommentStart .* {$channel = HIDDEN;} NewLine;
fragment CommentStart: '#';

// Lexer rules

fragment D: '0'..'9';
fragment A: 'A'..'Z'
    | 'a'..'z';
StringLength: D+;
NewLine   : '\r' '\n' | '\n' | '\r';
WhiteSpace: ' ';

Then I generate a parser for it using

java -cp "D:\wftdiff\lib\antlr-3.5.2\antlr-3.5.2-complete.jar" org.antlr.Tool -o src/com/mycompany/wftdiff/parser/ grammar-src/wft.g

...and call it like this:

val lexer = wftLexer(ANTLRFileStream(fileName))
val parser = wftParser(CommonTokenStream(lexer))
parser.wftFile()
System.out.println("Test")

fileName points to a text file with the following contents:

# Oracle Workflow Process Definition
# $Header$

VERSION_MAJOR = "2"
VERSION_MINOR = "6"
LANGUAGE = "GERMAN"

ACCESS_LEVEL = "100"

DEFINE ITEM_TYPE
  KEY NAME VARCHAR2(8)
  TRANS DISPLAY_NAME VARCHAR2(80)
  TRANS DESCRIPTION VARCHAR2(240)
  BASE PROTECT_LEVEL NUMBER
  BASE CUSTOM_LEVEL NUMBER
  BASE WF_SELECTOR VARCHAR2(240)
  BASE READ_ROLE REFERENCES ROLE
  BASE WRITE_ROLE REFERENCES ROLE
  BASE EXECUTE_ROLE REFERENCES ROLE
  BASE PERSISTENCE_TYPE VARCHAR2(8)
  BASE PERSISTENCE_DAYS NUMBER

  DEFINE ITEM_ATTRIBUTE
    KEY NAME VARCHAR2(30)
    TRANS DISPLAY_NAME VARCHAR2(80)
    TRANS DESCRIPTION VARCHAR2(240)
    BASE PROTECT_LEVEL NUMBER
    BASE CUSTOM_LEVEL NUMBER
    BASE TYPE VARCHAR2(8)
    BASE FORMAT VARCHAR2(240)
    BASE VALUE_TYPE VARCHAR2(8)
    BASE DEFAULT VARCHAR2(4000)
  END ITEM_ATTRIBUTE
END ITEM_TYPE

I get the following output:

Heyo!
Assignment found!
Assignment found!
Assignment found!
Assignment found!
test-data/partialSample01.wft line 25:2 no viable alternative at character 'D'
test-data/partialSample01.wft line 35:2 no viable alternative at character 'E'
Test

How should I change my grammar in order to get rid of the no viable alternative at character 'D' error?

Note that I don't need to parse this section of the file (I'm not interested in this particular information; it comes later in the file).

Update 1

I tried to ignore the whole thing as suggested here (using skip()), but it didn't help.

New grammar file:

grammar wft;

@header {
    package com.mycompany.wftdiff.parser;

    import com.mycompany.wftdiff.model.*;
}
@lexer::header {
    package com.mycompany.wftdiff.parser;
}
@members {
    private final WftFile wftFile = new WftFile();

    public WftFile getParsingResult() {
        return wftFile;
    }
}
wftFile:
    {
        System.out.println("Heyo!");
    }
    (CommentLine | assignment | NewLine)*
    itemTypeDefinition
    EOF
    ;

/**
 * ItemTypeDefinition
 * DEFINE ITEM_TYPE
 * END ITEM_TYPE
 */
itemTypeDefinition:
    'DEFINE ITEM_TYPE' NewLine
    (KeyName|TransStmt|BaseStmt|NewLine)+
        WhiteSpace*
        NewLine
        DefineItemAttribute
        WhiteSpace*
    'END ITEM_TYPE'
    ;

DefineItemAttribute: 'DEFINE ITEM_ATTRIBUTE' .* 'END ITEM_ATTRIBUTE' {skip();};

/**
 * KeyName
 * KEY NAME VARCHAR2(8)
 */
KeyName: WhiteSpace* KeyNameStart .* {$channel = HIDDEN;} NewLine;
fragment KeyNameStart: 'KEY NAME VARCHAR2(';

/**
 * TransStmt
 * TRANS DISPLAY_NAME VARCHAR2(80)
 */
TransStmt: WhiteSpace* TransStmtStart .* {$channel = HIDDEN;} NewLine;
fragment TransStmtStart: 'TRANS';

/**
 * BaseStmt
 BASE PROTECT_LEVEL NUMBER
 */
BaseStmt: WhiteSpace* BaseStmtStart .* {$channel = HIDDEN;} NewLine;
fragment BaseStmtStart: 'BASE';

/**
 * Assignment
 */
assignment returns [Assignment assignment]:
    {
        System.out.println("Assignment found!");
    }
    target=AssignmentTarget
    WhiteSpace '=' WhiteSpace
    value=String {
        assignment = new Assignment(target.getText(), value.getText());
        wftFile.addAssignment(new Assignment(target.getText(), value.getText()));
    }
    NewLine;

AssignmentTarget: A (A|D|'_')*;
String: '"' ~'"'* '"'
;

/**
 * Comment
 */
CommentLine: CommentStart .* {$channel = HIDDEN;} NewLine;
fragment CommentStart: '#';

// Lexer rules

fragment D: '0'..'9';
fragment A: 'A'..'Z'
    | 'a'..'z';
StringLength: D+;
NewLine   : '\r' '\n' | '\n' | '\r';
WhiteSpace: ' ';

Parsing result:

Heyo!
Assignment found!
Assignment found!
Assignment found!
Assignment found!
test-data/partialSample01.wft line 25:2 no viable alternative at character 'D'
test-data/partialSample01.wft line 36:0 missing DefineItemAttribute at 'END ITEM_TYPE'
Test

Update 2

The remaining problems I am trying to solve:

  1. Create a parser, which is capable to recognize all parts of this file, which are marked as relevant in the comments, that is

1.1. everything inside BEGIN ACTIVITY and END ACTIVITY tags, 1.2. everything inside BEGIN ACTIVITY_TRANSITION and END ACTIVITY_TRANSITION, 1.3. everything inside BEGIN PROCESS_ACTIVITY and BEGIN PROCESS_ACTIVITY tags.

By "recognize everything" I mean there must be ANTLR 3 code, which allows me to put Java statements that would process the data extracted from the file like in the assignment rule in the original post. I don't need answers to supply any Java code there, I will add that code later.

All parts which are not marked as relevant can be ignored by the parser (similar to the comments in the original grammar).

  1. Your grammar must be compatible with ANTLR 3, Java 8, and Windows 7.

  2. You can remove the code in the original version (like here), so you don't get compiler errors.

  3. The parser must be either be able to be generated using java -cp "D:\wftdiff\lib\antlr-3.5.2\antlr-3.5.2-complete.jar" org.antlr.Tool -o src/com/mycompany/wftdiff/parser/ grammar-src/wft.g, or, if you use any special settings, you need to specify them in your answer. The point is, I need to be able to reproduce your result.

  4. When I feed the sample file to the parser, it must consume it without complaining (without printing any ANTLR error messages, without crashing and without throwing technical exceptions like NullPointerException).


Solution

  • Here is the grammar. It recognize all parts, you can add java actions wherever you want.

    Compiled and tested with jdk1.8, antlr 3.5.2 and the provided sample input.

    grammar wft;
    
    @header {
        package com.mycompany.wftdiff.parser;
    }
    
    @lexer::header {
        package com.mycompany.wftdiff.parser;
    }
    
    @members {
    }
    
    wftFile :   (COMMENT|assignment|definition|flow)*
        ;
    
    assignment
        :   ID EQ STRING
        ;
    
    definition
        :   'DEFINE' ID
            (COMMENT | (dclass ID type) | definition)* 
            'END' ID
        ;
    
    
    dclass  :   'KEY' | 'BASE' | 'TRANS'
        ;
    
    type    :   tnum | tvarchar | tref | tdate
        ;
    
    tnum    :   'NUMBER'
        ;
    
    tvarchar:   'VARCHAR2' '(' INT ')'
        ;
    
    tref    :   'REFERENCES' ID
        ;
    
    tdate   :   'DATE'
        ;
    
    flow    :   'BEGIN' ID (STRING)+
            (COMMENT|assignment|flow)+
            'END' ID
        ;
    
    EQ  :   '='
        ;
    
    ID  :   ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
        ;
    
    
    NL  :   '\r'? '\n' {$channel=HIDDEN;}
        ;
    
    COMMENT
        :   '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
        ;
    
    WS  :   ( ' '
            | '\t'
            ) {$channel=HIDDEN;}
        ;
    
    STRING
        :  '"' ( ESC_SEQ | ~('\\'|'"') )* '"'
        ;
    
    fragment
    HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
    
    fragment
    ESC_SEQ
        :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
        |   UNICODE_ESC
        |   OCTAL_ESC
        ;
    
    fragment
    OCTAL_ESC
        :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
        |   '\\' ('0'..'7') ('0'..'7')
        |   '\\' ('0'..'7')
        ;
    
    fragment
    UNICODE_ESC
        :   '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
        ;
    
    INT :   '0'..'9'+
        ;