Antlr4 was installed on Ubuntu 22.04 with Python as follows:
wget https://github.com/antlr/grammars-v4/blob/master/sql/plsql/Python3/PlSqlLexerBase.py
wget https://github.com/antlr/grammars-v4/blob/master/sql/plsql/Python3/PlSqlParserBase.py
wget https://github.com/antlr/grammars-v4/blob/master/sql/plsql/PlSqlLexer.g4
wget https://github.com/antlr/grammars-v4/blob/master/sql/plsql/PlSqlParser.g4
Install Antlr4 for Python
pip3 install antlr4-python3-runtime==4.13.1
The following test script is used to parse the simple SQL, PLSQL files:
def main():
with open(sys.argv[1], 'r') as file:
filesrc = file.read()
lexer = PlSqlLexer(InputStream(filesrc))
parser = PlSqlParser(CommonTokenStream(lexer))
tree = parser.sql_script()
traverse(tree, parser.ruleNames)
def traverse(tree, rule_names, indent = 0):
if tree.getText() == "<EOF>":
return
elif isinstance(tree, TerminalNodeImpl):
print("{0}TOKEN='{1}'".format(" " * indent, tree.getText()))
else:
print("{0}{1}".format(" " * indent, rule_names[tree.getRuleIndex()]))
for child in tree.children:
traverse(child, rule_names, indent + 1)
if __name__ == '__main__':
main()
I have a simple input file as follows to test the above with, which is processed happily without errors by the Python script:
DECLARE
l_x NUMBER;
BEGIN
SELECT length(c1)
INTO l_x
FROM the_table
WHERE c2 = 'X';
END;
Which gives:
python3 ./runPLSQLFile.py test.sql
sql_script
unit_statement
anonymous_block
TOKEN='DECLARE'
seq_of_declare_specs
declare_spec
variable_declaration
identifier
id_expression
regular_id
TOKEN='l_x'
type_spec
datatype
native_datatype_element
TOKEN='NUMBER'
TOKEN=';'
TOKEN='BEGIN'
seq_of_statements
statement
sql_statement
data_manipulation_language_statements
select_statement
select_only_statement
subquery
subquery_basic_elements
query_block
TOKEN='SELECT'
selected_list
select_list_elements
expression
logical_expression
unary_logical_expression
multiset_expression
relational_expression
compound_expression
concatenation
model_expression
unary_expression
atom
general_element
general_element_part
id_expression
regular_id
non_reserved_keywords_pre12c
TOKEN='length'
function_argument
TOKEN='('
argument
expression
logical_expression
unary_logical_expression
multiset_expression
relational_expression
compound_expression
concatenation
model_expression
unary_expression
atom
general_element
general_element_part
id_expression
regular_id
TOKEN='c1'
TOKEN=')'
into_clause
TOKEN='INTO'
general_element
general_element_part
id_expression
regular_id
TOKEN='l_x'
from_clause
TOKEN='FROM'
table_ref_list
table_ref
table_ref_aux
table_ref_aux_internal
dml_table_expression_clause
tableview_name
identifier
id_expression
regular_id
TOKEN='the_table'
where_clause
TOKEN='WHERE'
condition
expression
logical_expression
unary_logical_expression
multiset_expression
relational_expression
relational_expression
compound_expression
concatenation
model_expression
unary_expression
atom
general_element
general_element_part
id_expression
regular_id
TOKEN='c2'
relational_operator
TOKEN='='
relational_expression
compound_expression
concatenation
model_expression
unary_expression
atom
constant
quoted_string
TOKEN=''X''
TOKEN=';'
TOKEN='END'
TOKEN=';'
But when I run against this script I get an error:
CREATE OR REPLACE PACKAGE pa_tsheet AS
--
PROCEDURE pr_new_tsheet_template
(
p_act_id IN timesheets.act_id %TYPE,
p_apd_id IN timesheets.apd_id %TYPE,
p_weekend_yn IN VARCHAR2,
p_job_desc IN timesheet_items.job_details %TYPE,
p_job_rate IN timesheet_items.rate %TYPE,
p_job_hours IN timesheet_items.hours %TYPE,
p_tms_id IN OUT timesheets.id %TYPE
);
--
END pa_tsheet;
/
Error:
python3 ./runPLSQLFiles.py ../pa_tsheet.pkh 2>&1
Traceback (most recent call last):
File "antlr_plsql/grammars/./runPLSQLFiles.py", line 33, in <module>
main()
File "antlr_plsql/grammars/./runPLSQLFiles.py", line 19, in main
tree = parser.sql_script()
File "antlr_plsql/grammars/PlSqlParser.py", line 15340, in sql_script
self.unit_statement()
File "antlr_plsql/grammars/PlSqlParser.py", line 16370, in unit_statement
self.create_package()
File "antlr_plsql/grammars/PlSqlParser.py", line 25046, in create_package
self.consume()
File ".local/lib/python3.10/site-packages/antlr4/Parser.py", line 348, in consume
self.getInputStream().consume()
File ".local/lib/python3.10/site-packages/antlr4/BufferedTokenStream.py", line 101, in consume
self.index = self.adjustSeekIndex(self.index + 1)
File ".local/lib/python3.10/site-packages/antlr4/CommonTokenStream.py", line 45, in adjustSeekIndex
return self.nextTokenOnChannel(i, self.channel)
File ".local/lib/python3.10/site-packages/antlr4/BufferedTokenStream.py", line 214, in nextTokenOnChannel
self.sync(i)
File ".local/lib/python3.10/site-packages/antlr4/BufferedTokenStream.py", line 112, in sync
fetched = self.fetch(n)
File ".local/lib/python3.10/site-packages/antlr4/BufferedTokenStream.py", line 124, in fetch
t = self.tokenSource.nextToken()
File ".local/lib/python3.10/site-packages/antlr4/Lexer.py", line 137, in nextToken
ttype = self._interp.match(self._input, self._mode)
File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 104, in match
return self.execATN(input, dfa.s0)
File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 173, in execATN
target = self.computeTargetState(input, s, t)
File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 231, in computeTargetState
self.getReachableConfigSet(input, s.configs, reach, t)
File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 280, in getReachableConfigSet
if self.closure(input, config, reach, currentAltReachedAcceptState, True, treatEofAsEpsilon):
File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 359, in closure
currentAltReachedAcceptState = self.closure(input, c, configs, currentAltReachedAcceptState, speculative, treatEofAsEpsilon)
File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 357, in closure
c = self.getEpsilonTarget(input, config, t, configs, speculative, treatEofAsEpsilon)
File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 396, in getEpsilonTarget
if self.evaluatePredicate(input, t.ruleIndex, t.predIndex, speculative):
File ".local/lib/python3.10/site-packages/antlr4/atn/LexerATNSimulator.py", line 465, in evaluatePredicate
return self.recog.sempred(None, ruleIndex, predIndex)
File "antlr_plsql/grammars/PlSqlLexer.py", line 16736, in sempred
return pred(localctx, predIndex)
File "antlr_plsql/grammars/PlSqlLexer.py", line 16747, in PROMPT_MESSAGE_sempred
return this.IsNewlineAtPos(-4)
NameError: name 'this' is not defined
What is the issue?
Both the lexer- and parser grammars contain predicates which in turn contain target specific code. Like this.IsNewlineAtPos(-4)
as the error from the Python interpreter indicates. Before using the generated lexer and parser in your Python code, you will need to rewrite all of this target specific code into valid Python code. Most probably this will only be changing this.
into self.
(not tested though).