tatsu.exceptions.FailedParse: (52:24) expecting one of: "'" '"' :
declarator = {pointer}? direct_declarator ;
^
I found a C BNF grammar here: https://cs.wmich.edu/~gupta/teaching/cs4850/sumII06/The%20syntax%20of%20C%20in%20Backus-Naur%20form.htm
I adapted so it can work with tatsu, but I'm not sure I did everything right.
The tatsu grammar:
translation_unit = {external_declaration}* ;
external_declaration = function_definition
| declaration ;
function_definition = {declaration_specifier}* declarator {declaration}* compound_statement ;
declaration_specifier = storage_class_specifier
| type_specifier
| type_qualifier ;
storage_class_specifier = "auto"
| "register"
| "static"
| "extern"
| "typedef" ;
type_specifier = "void"
| "char"
| "short"
| "int"
| "long"
| "float"
| "double"
| "signed"
| "unsigned"
| struct_or_union_specifier
| enum_specifier
| typedef_name ;
struct_or_union_specifier = struct_or_union identifier "{" {struct_declaration}+ "}"
| struct_or_union "{" {struct_declaration}+ "}"
| struct_or_union identifier ;
struct_or_union = "struct"
| "union" ;
struct_declaration = {specifier_qualifier}* struct_declarator_list ;
specifier_qualifier = type_specifier
| type_qualifier ;
struct_declarator_list = struct_declarator
| struct_declarator_list "," struct_declarator ;
struct_declarator = declarator
| declarator ":" constant_expression
| ":" constant_expression ;
declarator = {pointer}? direct_declarator ;
pointer = "*" {type_qualifier}* {pointer}? ;
type_qualifier = "const"
| "volatile" ;
direct_declarator = identifier
| "(" declarator ")"
| direct_declarator "[" {constant_expression}? "]"
| direct_declarator "(" parameter_type_list ")"
| direct_declarator "(" {identifier}* ")" ;
constant_expression = conditional_expression ;
conditional_expression = logical_or_expression
| logical_or_expression "?" expression ":" conditional_expression ;
logical_or_expression = logical_and_expression
| logical_or_expression "||" logical_and_expression ;
logical_and_expression = inclusive_or_expression
| logical_and_expression "&&" inclusive_or_expression ;
inclusive_or_expression = exclusive_or_expression
| inclusive_or_expression "|" exclusive_or_expression ;
exclusive_or_expression = and_expression
| exclusive_or_expression "^" and_expression ;
and_expression = equality_expression
| and_expression "&" equality_expression ;
equality_expression = relational_expression
| equality_expression "==" relational_expression
| equality_expression "!=" relational_expression ;
relational_expression = shift_expression
| relational_expression "<" shift_expression
| relational_expression ">" shift_expression
| relational_expression "<=" shift_expression
| relational_expression ">=" shift_expression ;
shift_expression = additive_expression
| shift_expression "<<" additive_expression
| shift_expression ">>" additive_expression ;
additive_expression = multiplicative_expression
| additive_expression "+" multiplicative_expression
| additive_expression "-" multiplicative_expression ;
multiplicative_expression = cast_expression
| multiplicative_expression "*" cast_expression
| multiplicative_expression "/" cast_expression
| multiplicative_expression "%" cast_expression ;
cast_expression = unary_expression
| "(" type_name ")" cast_expression ;
unary_expression = postfix_expression
| "++" unary_expression
| "--" unary_expression
| unary_operator cast_expression
| "sizeof" unary_expression
| "sizeof" type_name ;
postfix_expression = primary_expression
| postfix_expression "[" expression "]"
| postfix_expression "(" {assignment_expression}* ")"
| postfix_expression "." identifier
| postfix_expression "->" identifier
| postfix_expression "++"
| postfix_expression "--" ;
primary_expression = identifier
| constant
| string
| "(" expression ")" ;
constant = integer_constant
| character_constant
| floating_constant
| enumeration_constant ;
expression = assignment_expression
| expression "," assignment_expression ;
assignment_expression = conditional_expression
| unary_expression assignment_operator assignment_expression ;
assignment_operator = "="
| "/="
| "*="
| "%="
| "+="
| "-="
| "<<="
| ">>="
| "&="
| "^="
| "|=" ;
unary_operator = "&"
| "*"
| "+"
| "-"
| "~"
| "!" ;
type_name = {specifier_qualifier}+ {abstract_declarator}? ;
parameter_type_list = parameter_list
| parameter_list "," "..." ;
parameter_list = parameter_declaration
| parameter_list "," parameter_declaration ;
parameter_declaration = {declaration_specifier}+ declarator
| {declaration_specifier}+ abstract_declarator
| {declaration_specifier}+ ;
abstract_declarator = pointer
| pointer direct_abstract_declarator
| direct_abstract_declarator ;
direct_abstract_declarator = "(" abstract_declarator ")"
| {direct_abstract_declarator}? "[" {constant_expression}? "]"
| {direct_abstract_declarator}? "(" {parameter_type_list}? ")" ;
enum_specifier = "enum" identifier "{" enumerator_list "}"
| "enum" "{" enumerator_list "}"
| "enum" identifier ;
enumerator_list = enumerator
| enumerator_list "," enumerator ;
enumerator = identifier
| identifier "=" constant_expression ;
typedef_name = identifier ;
declaration = {declaration_specifier}+ {init_declarator}* ";" ;
init_declarator = declarator
| declarator "=" initializer ;
initializer = assignment_expression
| "{" initializer_list "}"
| "{" initializer_list "," "}" ;
initializer_list = initializer
| initializer_list "," initializer ;
compound_statement = "{" {declaration}* {statement}* "}" ;
statement = labeled_statement
| expression_statement
| compound_statement
| selection_statement
| iteration_statement
| jump_statement ;
labeled_statement = identifier ":" statement
| "case" constant_expression ":" statement
| "default" ":" statement ;
expression_statement = {expression}? ";" ;
selection_statement = "if" "(" expression ")" statement
| "if" "(" expression ")" statement "else" statement
| "switch" "(" expression ")" statement ;
iteration_statement = "while" "(" expression ")" statement
| "do" statement "while" "(" expression ")" ";"
| "for" "(" {expression}? ";" {expression}? ";" {expression}? ")" statement ;
jump_statement = "goto" identifier ";"
| "continue" ";"
| "break" ";"
| "return" {expression}? ";" ;
The C code:
int func(int i, char c) {
float f = 3;
}
The python code:
def main():
import pprint
import json
from tatsu import parse
from tatsu.util import asjson
csample = open('sample.c').read()
gram = open('cbnf_tatsu.txt').read()
ast = parse(gram, csample)
print('PPRINT')
pprint.pprint(ast, indent=2, width=20)
print()
print('JSON')
print(json.dumps(asjson(ast), indent=2))
print()
if __name__ == '__main__':
main()
Tatsu doesn't accept {rulename}?
, it uses [rulename]
instead.