pythonantlr4

Antlr Python error processing simple PLSQL


Antlr4 was installed on Ubuntu 22.04 with Python as follows:

wget https://github.com/antlr/grammars-v4/blob/master/sql/plsql/Python3/PlSqlLexerBase.py
wget https://github.com/antlr/grammars-v4/blob/master/sql/plsql/Python3/PlSqlParserBase.py

wget https://github.com/antlr/grammars-v4/blob/master/sql/plsql/PlSqlLexer.g4
wget https://github.com/antlr/grammars-v4/blob/master/sql/plsql/PlSqlParser.g4

Install Antlr4 for Python

pip3 install antlr4-python3-runtime==4.13.1

The following test script is used to parse the simple SQL, PLSQL files:

def main():
    with open(sys.argv[1], 'r') as file:
        filesrc = file.read()

    lexer = PlSqlLexer(InputStream(filesrc))
    parser = PlSqlParser(CommonTokenStream(lexer))
    tree = parser.sql_script()
    traverse(tree, parser.ruleNames)

def traverse(tree, rule_names, indent = 0):
    if tree.getText() == "<EOF>":
        return
    elif isinstance(tree, TerminalNodeImpl):
        print("{0}TOKEN='{1}'".format("  " * indent, tree.getText()))
    else:
        print("{0}{1}".format("  " * indent, rule_names[tree.getRuleIndex()]))
        for child in tree.children:
            traverse(child, rule_names, indent + 1)

if __name__ == '__main__':
    main()

I have a simple input file as follows to test the above with, which is processed happily without errors by the Python script:

DECLARE
    l_x NUMBER;
BEGIN
    SELECT length(c1)
    INTO l_x
    FROM the_table
    WHERE c2 = 'X';
END;

Which gives:

python3 ./runPLSQLFile.py test.sql
sql_script
  unit_statement
    anonymous_block
      TOKEN='DECLARE'
      seq_of_declare_specs
        declare_spec
          variable_declaration
            identifier
              id_expression
                regular_id
                  TOKEN='l_x'
            type_spec
              datatype
                native_datatype_element
                  TOKEN='NUMBER'
            TOKEN=';'
      TOKEN='BEGIN'
      seq_of_statements
        statement
          sql_statement
            data_manipulation_language_statements
              select_statement
                select_only_statement
                  subquery
                    subquery_basic_elements
                      query_block
                        TOKEN='SELECT'
                        selected_list
                          select_list_elements
                            expression
                              logical_expression
                                unary_logical_expression
                                  multiset_expression
                                    relational_expression
                                      compound_expression
                                        concatenation
                                          model_expression
                                            unary_expression
                                              atom
                                                general_element
                                                  general_element_part
                                                    id_expression
                                                      regular_id
                                                        non_reserved_keywords_pre12c
                                                          TOKEN='length'
                                                    function_argument
                                                      TOKEN='('
                                                      argument
                                                        expression
                                                          logical_expression
                                                            unary_logical_expression
                                                              multiset_expression
                                                                relational_expression
                                                                  compound_expression
                                                                    concatenation
                                                                      model_expression
                                                                        unary_expression
                                                                          atom
                                                                            general_element
                                                                              general_element_part
                                                                                id_expression
                                                                                  regular_id
                                                                                    TOKEN='c1'
                                                      TOKEN=')'
                        into_clause
                          TOKEN='INTO'
                          general_element
                            general_element_part
                              id_expression
                                regular_id
                                  TOKEN='l_x'
                        from_clause
                          TOKEN='FROM'
                          table_ref_list
                            table_ref
                              table_ref_aux
                                table_ref_aux_internal
                                  dml_table_expression_clause
                                    tableview_name
                                      identifier
                                        id_expression
                                          regular_id
                                            TOKEN='the_table'
                        where_clause
                          TOKEN='WHERE'
                          condition
                            expression
                              logical_expression
                                unary_logical_expression
                                  multiset_expression
                                    relational_expression
                                      relational_expression
                                        compound_expression
                                          concatenation
                                            model_expression
                                              unary_expression
                                                atom
                                                  general_element
                                                    general_element_part
                                                      id_expression
                                                        regular_id
                                                          TOKEN='c2'
                                      relational_operator
                                        TOKEN='='
                                      relational_expression
                                        compound_expression
                                          concatenation
                                            model_expression
                                              unary_expression
                                                atom
                                                  constant
                                                    quoted_string
                                                      TOKEN=''X''
        TOKEN=';'
      TOKEN='END'
  TOKEN=';'

But when I run against this script I get an error:

CREATE OR REPLACE PACKAGE pa_tsheet AS
--
  PROCEDURE pr_new_tsheet_template
  (
    p_act_id     IN     timesheets.act_id            %TYPE,
    p_apd_id     IN     timesheets.apd_id            %TYPE,
    p_weekend_yn IN     VARCHAR2,
    p_job_desc   IN     timesheet_items.job_details  %TYPE,
    p_job_rate   IN     timesheet_items.rate         %TYPE,
    p_job_hours  IN     timesheet_items.hours        %TYPE,
    p_tms_id     IN OUT timesheets.id                %TYPE
  );
--
END pa_tsheet;
/

Error:

python3 ./runPLSQLFiles.py ../pa_tsheet.pkh 2>&1 

Traceback (most recent call last):
  File "antlr_plsql/grammars/./runPLSQLFiles.py", line 33, in <module>
    main()
  File "antlr_plsql/grammars/./runPLSQLFiles.py", line 19, in main
    tree = parser.sql_script()
  File "antlr_plsql/grammars/PlSqlParser.py", line 15340, in sql_script
    self.unit_statement()
  File "antlr_plsql/grammars/PlSqlParser.py", line 16370, in unit_statement
    self.create_package()
  File "antlr_plsql/grammars/PlSqlParser.py", line 25046, in create_package
    self.consume()
  File ".local/lib/python3.10/site-packages/antlr4/Parser.py", line 348, in consume
    self.getInputStream().consume()
  File ".local/lib/python3.10/site-packages/antlr4/BufferedTokenStream.py", line 101, in consume
    self.index = self.adjustSeekIndex(self.index + 1)
  File ".local/lib/python3.10/site-packages/antlr4/CommonTokenStream.py", line 45, in adjustSeekIndex
    return self.nextTokenOnChannel(i, self.channel)
  File ".local/lib/python3.10/site-packages/antlr4/BufferedTokenStream.py", line 214, in nextTokenOnChannel
    self.sync(i)
  File ".local/lib/python3.10/site-packages/antlr4/BufferedTokenStream.py", line 112, in sync
    fetched = self.fetch(n)
  File ".local/lib/python3.10/site-packages/antlr4/BufferedTokenStream.py", line 124, in fetch
    t = self.tokenSource.nextToken()
  File ".local/lib/python3.10/site-packages/antlr4/Lexer.py", line 137, in nextToken
    ttype = self._interp.match(self._input, self._mode)
  File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 104, in match
    return self.execATN(input, dfa.s0)
  File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 173, in execATN
    target = self.computeTargetState(input, s, t)
  File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 231, in computeTargetState
    self.getReachableConfigSet(input, s.configs, reach, t)
  File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 280, in getReachableConfigSet
    if self.closure(input, config, reach, currentAltReachedAcceptState, True, treatEofAsEpsilon):
  File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 359, in closure
    currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
  File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 357, in closure
    c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
  File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 396, in getEpsilonTarget
    if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative):
  File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 465, in evaluatePredicate
    return self.recog.sempred(None, ruleIndex, predIndex)
  File "antlr_plsql/grammars/PlSqlLexer.py", line 16736, in sempred
    return pred(localctx, predIndex)
  File "antlr_plsql/grammars/PlSqlLexer.py", line 16747, in PROMPT_MESSAGE_sempred
    return this.IsNewlineAtPos(-4)
NameError: name 'this' is not defined

What is the issue?


Solution

  • Both the lexer- and parser grammars contain predicates which in turn contain target specific code. Like this.IsNewlineAtPos(-4) as the error from the Python interpreter indicates. Before using the generated lexer and parser in your Python code, you will need to rewrite all of this target specific code into valid Python code. Most probably this will only be changing this. into self. (not tested though).