parsinglexerbnfebnftatsu

tatsu.exceptions.FailedParse while using a C BNF grammar adapted to Tatsu


tatsu.exceptions.FailedParse: (52:24) expecting one of: "'" '"' :
declarator = {pointer}? direct_declarator ;
                       ^

I found a C BNF grammar here: https://cs.wmich.edu/~gupta/teaching/cs4850/sumII06/The%20syntax%20of%20C%20in%20Backus-Naur%20form.htm

I adapted so it can work with tatsu, but I'm not sure I did everything right.

The tatsu grammar:

translation_unit = {external_declaration}* ;

external_declaration = function_definition
                         | declaration ;

function_definition = {declaration_specifier}* declarator {declaration}* compound_statement ;

declaration_specifier = storage_class_specifier
                          | type_specifier
                          | type_qualifier ;

storage_class_specifier = "auto"
                            | "register"
                            | "static"
                            | "extern"
                            | "typedef" ;

type_specifier = "void"
                   | "char"
                   | "short"
                   | "int"
                   | "long"
                   | "float"
                   | "double"
                   | "signed"
                   | "unsigned"
                   | struct_or_union_specifier
                   | enum_specifier
                   | typedef_name ;

struct_or_union_specifier = struct_or_union identifier "{" {struct_declaration}+ "}"
                              | struct_or_union "{" {struct_declaration}+ "}"
                              | struct_or_union identifier ;

struct_or_union = "struct"
                    | "union" ;

struct_declaration = {specifier_qualifier}* struct_declarator_list ;

specifier_qualifier = type_specifier
                        | type_qualifier ;

struct_declarator_list = struct_declarator
                           | struct_declarator_list "," struct_declarator ;

struct_declarator = declarator
                      | declarator ":" constant_expression
                      | ":" constant_expression ;

declarator = {pointer}? direct_declarator ;

pointer = "*" {type_qualifier}* {pointer}? ;

type_qualifier = "const"
                   | "volatile" ;

direct_declarator = identifier
                      | "(" declarator ")"
                      | direct_declarator "[" {constant_expression}? "]"
                      | direct_declarator "(" parameter_type_list ")"
                      | direct_declarator "(" {identifier}* ")" ;

constant_expression = conditional_expression ;

conditional_expression = logical_or_expression
                           | logical_or_expression "?" expression ":" conditional_expression ;

logical_or_expression = logical_and_expression
                          | logical_or_expression "||" logical_and_expression ;

logical_and_expression = inclusive_or_expression
                           | logical_and_expression "&&" inclusive_or_expression ;

inclusive_or_expression = exclusive_or_expression
                            | inclusive_or_expression "|" exclusive_or_expression ;

exclusive_or_expression = and_expression
                            | exclusive_or_expression "^" and_expression ;

and_expression = equality_expression
                   | and_expression "&" equality_expression ;

equality_expression = relational_expression
                        | equality_expression "==" relational_expression
                        | equality_expression "!=" relational_expression ;

relational_expression = shift_expression
                          | relational_expression "<" shift_expression
                          | relational_expression ">" shift_expression
                          | relational_expression "<=" shift_expression
                          | relational_expression ">=" shift_expression ;

shift_expression = additive_expression
                     | shift_expression "<<" additive_expression
                     | shift_expression ">>" additive_expression ;

additive_expression = multiplicative_expression
                        | additive_expression "+" multiplicative_expression
                        | additive_expression "-" multiplicative_expression ;

multiplicative_expression = cast_expression
                              | multiplicative_expression "*" cast_expression
                              | multiplicative_expression "/" cast_expression
                              | multiplicative_expression "%" cast_expression ;

cast_expression = unary_expression
                    | "(" type_name ")" cast_expression ;

unary_expression = postfix_expression
                     | "++" unary_expression
                     | "--" unary_expression
                     | unary_operator cast_expression
                     | "sizeof" unary_expression
                     | "sizeof" type_name ;

postfix_expression = primary_expression
                       | postfix_expression "[" expression "]"
                       | postfix_expression "(" {assignment_expression}* ")"
                       | postfix_expression "." identifier
                       | postfix_expression "->" identifier
                       | postfix_expression "++"
                       | postfix_expression "--" ;

primary_expression = identifier
                       | constant
                       | string
                       | "(" expression ")" ;

constant = integer_constant
             | character_constant
             | floating_constant
             | enumeration_constant ;

expression = assignment_expression
               | expression "," assignment_expression ;

assignment_expression = conditional_expression
                          | unary_expression assignment_operator assignment_expression ;

assignment_operator = "="
                        | "/="
                        | "*="
                        | "%="
                        | "+="
                        | "-="
                        | "<<="
                        | ">>="
                        | "&="
                        | "^="
                        | "|=" ;

unary_operator = "&"
                   | "*"
                   | "+"
                   | "-"
                   | "~"
                   | "!" ;

type_name = {specifier_qualifier}+ {abstract_declarator}? ;

parameter_type_list = parameter_list
                        | parameter_list "," "..." ;

parameter_list = parameter_declaration
                   | parameter_list "," parameter_declaration ;

parameter_declaration = {declaration_specifier}+ declarator
                          | {declaration_specifier}+ abstract_declarator
                          | {declaration_specifier}+ ;

abstract_declarator = pointer
                        | pointer direct_abstract_declarator
                        | direct_abstract_declarator ;

direct_abstract_declarator =  "(" abstract_declarator ")"
                               | {direct_abstract_declarator}? "[" {constant_expression}? "]"
                               | {direct_abstract_declarator}? "(" {parameter_type_list}? ")" ;

enum_specifier = "enum" identifier "{" enumerator_list "}"
                   | "enum" "{" enumerator_list "}"
                   | "enum" identifier ;

enumerator_list = enumerator
                    | enumerator_list "," enumerator ;

enumerator = identifier
               | identifier "=" constant_expression ;

typedef_name = identifier ;

declaration =  {declaration_specifier}+ {init_declarator}* ";" ;

init_declarator = declarator
                    | declarator "=" initializer ;

initializer = assignment_expression
                | "{" initializer_list "}"
                | "{" initializer_list "," "}" ;

initializer_list = initializer
                     | initializer_list "," initializer ;

compound_statement = "{" {declaration}* {statement}* "}" ;

statement = labeled_statement
              | expression_statement
              | compound_statement
              | selection_statement
              | iteration_statement
              | jump_statement ;

labeled_statement = identifier ":" statement
                      | "case" constant_expression ":" statement
                      | "default" ":" statement ;

expression_statement = {expression}? ";" ;

selection_statement = "if" "(" expression ")" statement
                        | "if" "(" expression ")" statement "else" statement
                        | "switch" "(" expression ")" statement ;

iteration_statement = "while" "(" expression ")" statement
                        | "do" statement "while" "(" expression ")" ";"
                        | "for" "(" {expression}? ";" {expression}? ";" {expression}? ")" statement ;

jump_statement = "goto" identifier ";"
                   | "continue" ";"
                   | "break" ";"
                   | "return" {expression}? ";" ;

The C code:

int func(int i, char c) {
    float f = 3;
}

The python code:

def main():
    import pprint
    import json
    from tatsu import parse
    from tatsu.util import asjson
    csample = open('sample.c').read()
    gram = open('cbnf_tatsu.txt').read()
    ast = parse(gram, csample)
    print('PPRINT')
    pprint.pprint(ast, indent=2, width=20)
    print()

    print('JSON')
    print(json.dumps(asjson(ast), indent=2))
    print()


if __name__ == '__main__':
    main()

Solution

  • Tatsu doesn't accept {rulename}?, it uses [rulename] instead.