I experienced a strange quirk.
My script iterates over a list of image URLs (taken from a textarea input) and analyses each of them for defined structures to find some type of image IDs. Basically the url from the input is tested, to see if it includes some regular expression or keywords. But when I iterate over the same URL several times, the same if-statement produces various results. Sometimes the it finds the ID and sometimes it doesn't.
I better illustrate that with a small snippet:
It doesn't make a difference if I split up the if-else statements into separate if-statements. It also makes no difference if i iterate through the URLs with for(i=0; i < urls.length; i++)
loop instead of a for-of loop.
Any ideas what's going on here?
function preprocessImgURL(url) {
const urls = url.trim().split(/\r?\n/);
const aRegex = /alpha\d{9,11}z/gi;
const cRegex = /\d{6}_[a-zA-Z]{2,3}-\d{5,8}/gi;
for (let urli = 0; urli < urls.length; urli++) {
console.log('\n\nPROCESSING NEXT URL');
console.log(urls[urli]);
if(urls[urli].includes("channelB-")) {
console.log('Row ' + urli + ' is B Domain + B URL.');
} else if(aRegex.test(urls[urli])) {
console.log('Row ' + urli + ' is A URL.');
} else if(cRegex.test(urls[urli])) {
console.log('Row ' + urli + ' is C URL.');
} else {
console.log('Row ' + urli + ' doesnt match any criteria. (else statement reached)');
console.log('Row ' + urli + ' matches b criteria: ' + ( urls[urli].includes("channelB-") ));
console.log('Row ' + urli + ' matches aRegex: ' + aRegex.test(urls[urli]));
console.log('Row ' + urli + ' matches cRegex: ' + cRegex.test(urls[urli]));
}
}
}
exampleUrlString = "https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg\nhttps://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600\nhttps://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpg\nhttps://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpg\nhttps://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg\nhttps://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg";
preprocessImgURL(exampleUrlString);
Row 0 is C URL.
Row 1 is A URL.
Row 2 is C URL.
Row 3 is B URL.
Row 4 is C URL.
Row 5 is C URL.
Row 0 is C URL.
Row 1 is A URL.
Row 2 doesnt match any criteria. (else statement reached)
Row 3 is B URL.
Row 4 doesnt match any criteria. (else statement reached) *
Row 5 doesnt match any criteria. (else statement reached)
function preprocessImgURL(url) {
const urls = url.trim().split(/\r?\n/);
const aRegex = /alpha\d{9,11}z/gi;
const cRegex = /\d{6}_[a-zA-Z]{2,3}-\d{5,8}/gi;
for (let urli = 0; urli < urls.length; urli++) {
console.log('\n\nPROCESSING NEXT URL');
console.log(urls[urli]);
if(urls[urli].includes("channelB-")) {
console.log('Row ' + urli + ' is B Domain + B URL.');
} else if(aRegex.test(urls[urli])) {
console.log('Row ' + urli + ' is A URL.');
} else if(cRegex.test(urls[urli])) {
console.log('Row ' + urli + ' is C URL.');
}
}
}
exampleUrlString = "https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg\nhttps://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600\nhttps://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpg\nhttps://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpg\nhttps://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg\nhttps://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg";
preprocessImgURL(exampleUrlString);
Row 0 is C URL.
Row 1 is A URL.
[no output as there is no else statement that could handle row 2]
Row 3 is B URL.
Row 4 is C URL. *
[no output as there is no else statement that could handle row 5]
function preprocessImgURL(url, imgrow) {
const aRegex = /alpha\d{9,11}z/gi;
const cRegex = /\d{6}_[a-zA-Z]{2,3}-\d{5,8}/gi;
console.log(url);
if(url.includes("channelB-") && (url.includes("domainB.de") || url.includes("domain-B.de"))) {
console.log('Row ' + imgrow + ' is B Domain + B URL.');
} else if(aRegex.test(url)) {
console.log('Row ' + imgrow + ' is A URL.');
} else if(cRegex.test(url)) {
console.log('Row ' + imgrow + ' is C URL.');
} else {
console.log('Row ' + imgrow + ' doesnt match any criteria. (else statement reached)');
console.log('Row ' + imgrow + ' matches b criteria: ' + ( url.includes("channelB-") && (url.includes("domainB.de") || url.includes("domain-B.de"))));
console.log('Row ' + imgrow + ' matches aRegex: ' + aRegex.test(url));
console.log('Row ' + imgrow + ' matches cRegex: ' + cRegex.test(url));
}
}
urls = new Array(
"https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg",
"https://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600",
"https://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpg",
"https://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpg",
"https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg", "https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg"
);
for (let urli = 0; urli < urls.length; urli++) {
console.log('\n\nPROCESSING NEXT URL');
preprocessImgURL(urls[urli], urli);
}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Create Record</title>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.css">
<style type="text/css">
.wrapper {
max-width: 900px;
margin: auto;
}
</style>
</head>
<body>
<script>
var imgrow = 0;
function preprocessImgURL(url) {
const urls = url.trim().split(/\r?\n/);
const aRegex = /alpha\d{9,11}z/gi;
const cRegex = /\d{6}_[a-zA-Z]{2,3}-\d{5,8}/gi;
for (let urli = 0; urli < urls.length; urli++) {
console.log(urls[urli]);
document.getElementById('result').innerHTML += urls[urli] + '<br>';
if(urls[urli].includes("channelB-") && (urls[urli].includes("domainB.de") || urls[urli].includes("domain-B.de"))) {
console.log('Row ' + imgrow + ' is B Domain + B URL.');
document.getElementById('result').innerHTML += 'Row ' + imgrow + ' is B Domain + B URL.' + '<br>';
} else if(aRegex.test(urls[urli])) {
console.log('Row ' + imgrow + ' is A URL.');
document.getElementById('result').innerHTML += 'Row ' + imgrow + ' is A URL.' + '<br>';
} else if(cRegex.test(urls[urli])) {
console.log('Row ' + imgrow + ' is C URL.');
document.getElementById('result').innerHTML += 'Row ' + imgrow + ' is C URL.' + '<br>';
} else {
console.log(cRegex);
document.getElementById('result').innerHTML += cRegex + '<br>';
console.log(cRegex.test(urls[urli]));
document.getElementById('result').innerHTML += cRegex.test(urls[urli]) + '<br>';
}
console.log('NEXT URL');
document.getElementById('result').innerHTML += '<br><br>';
++imgrow;
}
}
</script>
<div class="wrapper">
<div class="container-fluid">
<form name="myForm" id="myForm">
<div class="form-group">
<h4>example input</h4>
<p style="font-size: 80%;">
https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg<br>
https://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600<br>
https://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpg<br>
https://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpg<br>
https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg<br>
https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg
</p>
<h4>relevant criteria (for now):</h4>
<p>A: not B && includes something like alpha[9-11digits]z <br>
B: includes channelB- && ( includes domainB.de || includes domain-B.de)<br>
C: not A && not B && includes something like [6digits]_[2-3letter]-[5-8digits]<br></p>
</div>
<div class="form-group">
<h4>Image URLs (1 per row)</h4>
<textarea rows="5" name="imgurls" class="form-control" required onchange="preprocessImgURL(this.value)" style="font-size: 80%;"></textarea>
</div>
</form>
<div style="font-family: monospace;" id="result">
<h4>Result:</h4>
</div>
<div style="font-family: monospace;">
<h4>Expected/experienced Result for example input:</h4>
<p>Row 0 is C URL. / π <br>
Row 1 is A URL. / π <br>
Row 2 is C URL. / π <br>
Row 3 is B URL. / π <br>
Row 4 is C URL. / π <br>
Row 5 is C URL. / π <br></p>
</div>
</div>
</div>
</body>
</html>
When using the very same regular expression multiple times, like you're doing in the loop, it may remember what it has processed:
Using test() on a regex with the "global" flag
When a regex has the global flag set,
test()
will advance thelastIndex
of the regex. (RegExp.prototype.exec()
also advances thelastIndex
property.)Further calls to
test(str)
will resume searchingstr
starting fromlastIndex
. ThelastIndex
property will continue to increase each timetest()
returnstrue
.Note: As long as
test()
returnstrue
,lastIndex
will not resetβeven when testing a different string!When
test()
returnsfalse
, the calling regex'slastIndex
property will reset to0
.The following example demonstrates this behavior:
const regex = /foo/g; // the "global" flag is set // regex.lastIndex is at 0 regex.test('foo') // true // regex.lastIndex is now at 3 regex.test('foo') // false // regex.lastIndex is at 0 regex.test('barfoo') // true // regex.lastIndex is at 6 regex.test('foobar') // false // regex.lastIndex is at 0 // (...and so on)
In your case, you do not need the global flag.