We've got such regexp:
var regexp = /^one (two)+ three/;
So only string like "one two three"
or "one two three four"
or "one twotwo three"
etc. will match it.
However, if we've got string like
"one "
- is still 'promising' that maybe soon it will match
but this string:
"one three"
will never match no matter what we'll do.
Is there some way to check if given string have chances to become matching or not?
I need it for some tips during writing when I want to recommend all options that begins with given input (regexp's I'm using are pretty long and I dont want really to mess with them).
In other words - I want to check if string has ended during checking and nothing 'not matching' was faced.
In even more other words - Answer would be inside reason of not matching. If reason is end of string - then it would be promissing. However I dont know any way to check why some string didnt match
This is a regex feature known as partial matching, it's available in several regex engines such as PCRE, Boost, Java but not in JavaScript.
Andacious's answer shows a very nice way to overcome this limitation, we just need to automate this.
Well... challenge accepted :)
Fortunately, JavaScript has a very limited regex feature set with a simple syntax, so I wrote a simple parser and on-the-fly transformation for this task, based on the features listed on MDN. This code has been updated to handle ES2018 features.
A couple points of interest:
exec
is null
or an array whose first element is the empty string(?!)
in the regex) and anchors (^
and $
). Lookbehinds (both positive and negative) are also kept as-is.RegExp
object from an invalid pattern in the first place. This may break in the future if new regex features are introduced.^(\w+)\s+\1$
won't yield a partial match against hello hel
for instance.RegExp.prototype.toPartialMatchRegex = function() {
"use strict";
var re = this,
source = this.source,
i = 0;
function process () {
var result = "",
tmp;
function appendRaw(nbChars) {
result += source.substr(i, nbChars);
i += nbChars;
};
function appendOptional(nbChars) {
result += "(?:" + source.substr(i, nbChars) + "|$)";
i += nbChars;
};
while (i < source.length) {
switch (source[i])
{
case "\\":
switch (source[i + 1])
{
case "c":
appendOptional(3);
break;
case "x":
appendOptional(4);
break;
case "u":
if (re.unicode) {
if (source[i + 2] === "{") {
appendOptional(source.indexOf("}", i) - i + 1);
} else {
appendOptional(6);
}
} else {
appendOptional(2);
}
break;
case "p":
case "P":
if (re.unicode) {
appendOptional(source.indexOf("}", i) - i + 1);
} else {
appendOptional(2);
}
break;
case "k":
appendOptional(source.indexOf(">", i) - i + 1);
break;
default:
appendOptional(2);
break;
}
break;
case "[":
tmp = /\[(?:\\.|.)*?\]/g;
tmp.lastIndex = i;
tmp = tmp.exec(source);
appendOptional(tmp[0].length);
break;
case "|":
case "^":
case "$":
case "*":
case "+":
case "?":
appendRaw(1);
break;
case "{":
tmp = /\{\d+,?\d*\}/g;
tmp.lastIndex = i;
tmp = tmp.exec(source);
if (tmp) {
appendRaw(tmp[0].length);
} else {
appendOptional(1);
}
break;
case "(":
if (source[i + 1] == "?") {
switch (source[i + 2])
{
case ":":
result += "(?:";
i += 3;
result += process() + "|$)";
break;
case "=":
result += "(?=";
i += 3;
result += process() + ")";
break;
case "!":
tmp = i;
i += 3;
process();
result += source.substr(tmp, i - tmp);
break;
case "<":
switch (source[i + 3])
{
case "=":
case "!":
tmp = i;
i += 4;
process();
result += source.substr(tmp, i - tmp);
break;
default:
appendRaw(source.indexOf(">", i) - i + 1);
result += process() + "|$)";
break;
}
break;
}
} else {
appendRaw(1);
result += process() + "|$)";
}
break;
case ")":
++i;
return result;
default:
appendOptional(1);
break;
}
}
return result;
}
return new RegExp(process(), this.flags);
};
// Test code
(function() {
document.write('<span style="display: inline-block; width: 60px;">Regex: </span><input id="re" value="^one (two)+ three"/><br><span style="display: inline-block; width: 60px;">Input: </span><input id="txt" value="one twotw"/><br><pre id="result"></pre>');
document.close();
var run = function() {
var output = document.getElementById("result");
try
{
var regex = new RegExp(document.getElementById("re").value);
var input = document.getElementById("txt").value;
var partialMatchRegex = regex.toPartialMatchRegex();
var result = partialMatchRegex.exec(input);
var matchType = regex.exec(input) ? "Full match" : result && result[0] ? "Partial match" : "No match";
output.innerText = partialMatchRegex + "\n\n" + matchType + "\n" + JSON.stringify(result);
}
catch (e)
{
output.innerText = e;
}
};
document.getElementById("re").addEventListener("input", run);
document.getElementById("txt").addEventListener("input", run);
run();
}());
I tested it a little bit and it seems to work fine, let me know if you find any bugs.