javascriptnode.jsnearley

Unexpected WS token: " "


I am using nearley.js and moo.js to make a programming language. With moo.js, there is a NL, which means new line regex matching, and mine is `/[\r\n]+/ but there seems to be a problem. With the tutorial that I am following, it says

Unexpected WS token: "    ". Instead, I was expecting to see one of the following:

A identifier token based on:
    var_assign →  ● %identifier _ "=" _ expr
A identifier token based on:
    fun_call →  ● %identifier _ "(" _ fun_call$ebnf$1 ")"

    at Parser.feed (C:\Users\mcqui\OneDrive\Desktop\Programming language for capstones\node_modules\nearley\lib\nearley.js:343:27)
    at main (C:\Users\mcqui\OneDrive\Desktop\Programming language for capstones\parse.js:15:12)

I don't know what to do because I checked everything and It doesn't seem to work. Here is my lexer.js file:

const moo = require('moo')
const fs = require("mz/fs")

let lexer = moo.compile({
  WS:      /[ \t]+/,
  comment: /\/\/.*?$/,
  number:  /0|[1-9][0-9]*/,
  string:  /"(?:\\["\\]|[^\n"\\])*"/,
  lparen:  '(',
  rparen:  ')',
  lbrace:  '{',
  rbrace:  '}',
  identifier: /[a-zA-Z][a-zA-Z_0-9]*/,
  fatarrow: '=>',
  assign: '=',
  NL: { match: /[\r\n]+/, lineBreaks: true }
});

module.exports = lexer;


async function main(){
    const code = (await fs.readFile("main.kpp")).toString()
    lexer.reset(code)
    while (true){
        const token = lexer.next();
        if(!token){
            break;
        }
        console.log(token);
    }
}

Here is my parse.js file:

const nearley = require("nearley");
const grammar = require("./kpp.js");
const fs = require('mz/fs');

async function main() {

    const filename = process.argv[2];
    if(!filename){
        console.log("Provide a .kpp file");
        return;
    }

    const code = (await fs.readFile(filename)).toString();
    const parser = new nearley.Parser(nearley.Grammar.fromCompiled(grammar));
    parser.feed(code);
    if(parser.results.length > 1){
        console.log('Error');
    } else if (parser.results.length == 1){
        const ast = parser.results[0];
        const outputFilename = filename.replace('.kpp', '.ast');
        await fs.writeFile(outputFilename, JSON.stringify(ast, null, " "))
        console.log(`WROTE ${outputFilename}.`)
    } else{
        console.log("Parse error")
    }

}

main().catch(err => console.log(err.stack));

Here is the file that I am running(in my own programming lanuage:

f = () => 1
g = (a b) => add(multiply(2 a) b)
h = (x y) => {
    show("x=" x)
    show("y=" y)
    g(x y)
}
result = h(3 4)
show("result =" result)
show("f =" f())
show("f(gf f) =" g(f() f()))

And here is the nearley.js config file:

@{%
const myLexer = require("./lexer")
%}

@lexer myLexer

statements
    ->statement
    {%
        (data) => {
            return [data[0]]
        }
    %}
    | statements %NL statement
    {%
        (data) => {
            return [...data[0], data[2]]
        }
    %}

statement
    -> var_assign  {% id %}
    | fun_call     {% id %}



var_assign
    -> %identifier _ "=" _ expr
        {%
            (data) => {
                return {
                    type: "var_assign",
                    var_name: data[0],
                    value: data[4]
                }
            }
        %}

fun_call
    -> %identifier _ "(" _ (arg_list _):? ")"
    {%
        (data) => {
            return {
                type: 'fun_call',
                fun_name: data[0],
                arguments: data[4] ? data[4][0] : []
            }
        }
    %}

arg_list
    -> expr
    {%
        (data) => {
            return [data[0]]
        }
    %}
    | arg_list __ expr
    {%
        (data) => {
            return [...data[0], data[2]]
        }
    %}

expr
    -> %string    {% id %}
    | %number     {% id %}
    | %identifier {% id %}
    | fun_call    {% id %}
    | lamba       {% id %}

lamba -> "(" _ (param_list _):? ")" _ "=>" _ lamba_body
    {%
        (data) => {
            return {
                type: "lamba",
                parameters: data[2] ? data[2][0] : [],
                body: data[7]
            }
        }
    %}

param_list
    -> %identifier (__ %identifier):*
    {%
        (data) => {
            const repeatedPieces = data[1];
            const restParams = repeatedPieces.map(piece => piece[1])
            return [data[0], ...restParams]
        }
    %}

lamba_body
    -> expr
        {%
            (data) => {
                return [data[0]];
            }
        %}
    | "{" _ %NL statements %NL _ "}"
        {%
            (data) => {
                return data[3];
            }
        %}

_ -> %WS:*

__ -> %WS:+

Solution

  • The problem here comes from the fact, that you did not specify anything about the expectation of tabulation. statements %NL statement clearly tells the parser, that you are expecting nothing else, than one or more new line characters. I would suggest you to not specify new line by itself as the end of the string, but instead expect whitespace (newline whitespace):* instead. You could also use a similar technic, that this lexer and this whitespace rule uses.