javaantlr

In ANTLR 3, how do I generate a lexer (and parser) at runtime instead of ahead of time?


I want to generate an antlr lexer at runtime -- that is, generate the grammar and from the grammar generate the lexer class, and its supporting bits at runtime. I am happy to feed it into the the java compiler, which is accessible at runtime.


Solution

  • Here's a quick and dirty way to:

    1. generate a combined (!) ANTLR grammar .g file given a String as grammar-source,
    2. and create a Parser & Lexer from this .g file,
    3. compile the these Parser & Lexer .java files,
    4. create instances of the Parser & Lexer classes and invoke the entry point of the parser.

    Main.java

    import java.io.*;
    import javax.tools.*;
    import java.lang.reflect.*;
    import org.antlr.runtime.*;
    import org.antlr.Tool;
    
    public class Main {
    
        public static void main(String[] args) throws Exception {
    
            // The grammar which echos the parsed characters to theconsole,
            // skipping any white space chars.
            final String grammar =
                    "grammar T;                                                  \n" +
                    "                                                            \n" +
                    "parse                                                       \n" +
                    "  :  (ANY {System.out.println(\"ANY=\" + $ANY.text);})* EOF \n" +
                    "  ;                                                         \n" +
                    "                                                            \n" +
                    "SPACE                                                       \n" +
                    "  :  (' ' | '\\t' | '\\r' | '\\n') {skip();}                \n" +
                    "  ;                                                         \n" +
                    "                                                            \n" +
                    "ANY                                                         \n" +
                    "  :  .                                                      \n" +
                    "  ;                                                           ";
            final String grammarName = "T";
            final String entryPoint = "parse";
    
            // 1 - Write the `.g` grammar file to disk.
            Writer out = new BufferedWriter(new FileWriter(new File(grammarName + ".g")));
            out.write(grammar);
            out.close();
    
            // 2 - Generate the lexer and parser.
            Tool tool = new Tool(new String[]{grammarName + ".g"});
            tool.process();
    
            // 3 - Compile the lexer and parser.
            JavaCompiler compiler = ToolProvider.getSystemJavaCompiler();
            compiler.run(null, System.out, System.err, "-sourcepath", "", grammarName + "Lexer.java");
            compiler.run(null, System.out, System.err, "-sourcepath", "", grammarName + "Parser.java");
    
            // 4 - Parse the command line parameter using the dynamically created lexer and 
            //     parser with a bit of reflection Voodoo :)
            Lexer lexer = (Lexer)Class.forName(grammarName + "Lexer").newInstance();
            lexer.setCharStream(new ANTLRStringStream(args[0]));
            CommonTokenStream tokens = new CommonTokenStream(lexer);
            Class<?> parserClass = Class.forName(grammarName + "Parser");
            Constructor parserCTor = parserClass.getConstructor(TokenStream.class);
            Parser parser = (Parser)parserCTor.newInstance(tokens);
            Method entryPointMethod = parserClass.getMethod(entryPoint);
            entryPointMethod.invoke(parser);
        }
    }
    

    Which, after compiling and running it like this (on *nix):

    java -cp .:antlr-3.2.jar Main "a b    c"
    

    or on Windows

    java -cp .;antlr-3.2.jar Main "a b    c"
    

    , produces the following output:

    ANY=a
    ANY=b
    ANY=c