javaregexargumentscommand-line-arguments

Java RegEx for parsing the quoted arguments


Need a Java regex pattern for the following scenario:

Case 1:

Input string:

"a"

Matches:

a

Case 2:

Input string:

"a b"

Matches:

a b

Case 3:

Input string:

"aA Bb" cCc 123 4 5 6 7xy "\"z9" "\"z9$^"

Matches:

aA Bb
cCc
123
4
5
6
7xy
"z9
"z9$^

Case 4:

Input string:

"a b c

Matches:

None - since the quotes are unbalanced, hence pattern match fails.

Case 5:

Input string:

"a b" "c

Matches:

None - since the quotes are unbalanced, hence pattern match fails.

Case 6:

Input string:

"a b" p q r "x y z"

Matches:

a b
p 
q 
r
x y z

Case 7:

Input string:

"a b" p q r "x y \"z\""

Matches:

a b
p 
q
r
x y "z"

Case 8:

Input string:

"a b" p q r "x \"y \"z\""

Matches:

a b
p 
q 
r
x "y "z"

And of course, the simplest one:

Case 9:

Input string:

a b

Matches:

a
b

Tried using a pattern, but it doesn't seem to match all above cases.

public List<String> parseArgs(String argStr) {
    List<String> params = new ArrayList<String>();
    String pattern = "\\s*(\"[^\"]+\"|[^\\s\"]+)";
    Pattern quotedParamPattern = Pattern.compile(pattern);
    Matcher matcher = quotedParamPattern.matcher(argStr);
    while (matcher.find()) {
        String param = matcher.group();
            System.out.println(param);
            params.add(param);
    }
    return params;
}

public void test(String argStr) {
    String[] testStrings = new String[]{"a", "a b", "a b \"c\"", "a b \"c"};
    for(String s: testStrings){
        parseArgs(s);
    }
}

Solution

  • Have written a class "CLIParser" which will give you the result.

    //instantiate the CLIParser 
    
    CLIParser parser = new CLIParser("\"a b\" p q r \"x y z\"");
    
    //call the method getTokens which gives you the result.
    
    ArrayList<String> resultTokens = parser.getTokens();
    
    
    ###################CLI Parser Class definition#################################
    
    class CLIParser {
        private String cmdString;
    
        public CLIParser(String cmdString) {
            this.cmdString = cmdString;
        }
    
        public ArrayList<String> getTokens() throws Exception {
            ArrayList<String> finalTokens = new ArrayList<String>();
            ArrayList<StringBuffer> tokens = new ArrayList<StringBuffer>();
        char inArray[] = this.cmdString.toCharArray();
        StringBuffer token = new StringBuffer();
        int valid = checkIfTheStringIsValid(inArray);
        if (valid == -1) {
            for (int i = 0; i <= inArray.length; i++) {
    
                if (i != inArray.length) {
                    if ((inArray[i] != ' ') && (inArray[i] != '"')) {
                        token.append(inArray[i]);
                    }
    
                    if ((inArray[i] == '"') && (inArray[i - 1] != '\\')) {
                        i = i + 1;
                        while (checkIfLastQuote(inArray, i)) {
                            token.append(inArray[i]);
                            i++;
                        }
                    }
                }
                if (i == inArray.length) {
                    tokens.add(token);
                    token = new StringBuffer();
                } else if (inArray[i] == ' ' && inArray[i] != '"') {
                    tokens.add(token);
                    token = new StringBuffer();
                }
            }
        } else {
            throw new InvalidCommandException(
                    "Invalid command. Couldn't identify sequence at position "
                            + valid);
        }
        for(StringBuffer tok:tokens){
            finalTokens.add(tok.toString());
        }
        return finalTokens;
    }
    
    private static int checkIfTheStringIsValid(char[] inArray) {
        Stack myStack = new Stack<Character>();
        int pos = 0;
        for (int i = 0; i < inArray.length; i++) {
            if (inArray[i] == '"' && inArray[i - 1] != '\\') {
                pos = i;
                if (myStack.isEmpty())
                    myStack.push(inArray[i]);
                else
                    myStack.pop();
            }
        }
        if (myStack.isEmpty())
            return -1;
        else
            return pos;
    }
    
    private static boolean checkIfLastQuote(char inArray[], int i) {
        if (inArray[i] == '"') {
            if (inArray[i - 1] == '\\') {
                return true;
            } else
                return false;
        } else
            return true;
    }
    }