javascriptif-statementboolean-expression

Javascript: If() is not reliable within loop or when based on regex objects with global or sticky flags


I experienced a strange quirk.

My script iterates over a list of image URLs (taken from a textarea input) and analyses each of them for defined structures to find some type of image IDs. Basically the url from the input is tested, to see if it includes some regular expression or keywords. But when I iterate over the same URL several times, the same if-statement produces various results. Sometimes the it finds the ID and sometimes it doesn't.

I better illustrate that with a small snippet:

It doesn't make a difference if I split up the if-else statements into separate if-statements. It also makes no difference if i iterate through the URLs with for(i=0; i < urls.length; i++) loop instead of a for-of loop.

Any ideas what's going on here?

js code snippets

DOESN'T WORK (if inside a loop inside a function):

    function preprocessImgURL(url) {
        const urls = url.trim().split(/\r?\n/);
        const aRegex = /alpha\d{9,11}z/gi;
        const cRegex = /\d{6}_[a-zA-Z]{2,3}-\d{5,8}/gi;
        
        for (let urli = 0; urli < urls.length; urli++) {
            console.log('\n\nPROCESSING NEXT URL');
            
            console.log(urls[urli]);
            if(urls[urli].includes("channelB-")) {
                console.log('Row ' + urli + ' is B Domain + B URL.');
            } else if(aRegex.test(urls[urli])) {
                console.log('Row ' + urli + ' is A URL.');
            } else if(cRegex.test(urls[urli])) {
                console.log('Row ' + urli + ' is C URL.');
            } else {
        console.log('Row ' + urli + ' doesnt match any criteria. (else statement reached)');
                console.log('Row ' + urli + ' matches b criteria: ' + ( urls[urli].includes("channelB-") ));
                console.log('Row ' + urli + ' matches aRegex: ' + aRegex.test(urls[urli]));
                console.log('Row ' + urli + ' matches cRegex: ' + cRegex.test(urls[urli]));
            }
        }
    }
  
  exampleUrlString = "https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg\nhttps://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600\nhttps://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpg\nhttps://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpg\nhttps://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg\nhttps://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg";
  preprocessImgURL(exampleUrlString);

expected output:
Row 0 is C URL.
Row 1 is A URL.
Row 2 is C URL.
Row 3 is B URL.
Row 4 is C URL.
Row 5 is C URL.
real output:
Row 0 is C URL.
Row 1 is A URL.
Row 2 doesnt match any criteria. (else statement reached)
Row 3 is B URL.
Row 4 doesnt match any criteria. (else statement reached) *
Row 5 doesnt match any criteria. (else statement reached)
real output:
Row 0 is C URL.
Row 1 is A URL.
[no output as there is no else statement that could handle row 2]
Row 3 is B URL.
Row 4 is C URL. *
[no output as there is no else statement that could handle row 5]

WORKS (external loop, function processes only one url):

function preprocessImgURL(url, imgrow) {
    const aRegex = /alpha\d{9,11}z/gi;
    const cRegex = /\d{6}_[a-zA-Z]{2,3}-\d{5,8}/gi;
    
    console.log(url);
    if(url.includes("channelB-") && (url.includes("domainB.de") || url.includes("domain-B.de"))) {
        console.log('Row ' + imgrow + ' is B Domain + B URL.');
    } else if(aRegex.test(url)) {
        console.log('Row ' + imgrow + ' is A URL.');
    } else if(cRegex.test(url)) {
        console.log('Row ' + imgrow + ' is C URL.');
    } else {
        console.log('Row ' + imgrow + ' doesnt match any criteria. (else statement reached)');
        console.log('Row ' + imgrow + ' matches b criteria: ' + ( url.includes("channelB-") && (url.includes("domainB.de") || url.includes("domain-B.de"))));
        console.log('Row ' + imgrow + ' matches aRegex: ' + aRegex.test(url));
        console.log('Row ' + imgrow + ' matches cRegex: ' + cRegex.test(url));
    }
        
}

urls = new Array(
    "https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg", 
    "https://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600", 
    "https://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpg", 
    "https://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpg",
    "https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg", "https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg"
);

for (let urli = 0; urli < urls.length; urli++) {
    console.log('\n\nPROCESSING NEXT URL');
    preprocessImgURL(urls[urli], urli);
}

full html code to reproduce issue

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Create Record</title>
    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.css">
    <style type="text/css">
    .wrapper {
        max-width: 900px;
        margin: auto;
    }
    </style>
</head>
<body>
    <script>
    var imgrow = 0;
    function preprocessImgURL(url) {
        const urls = url.trim().split(/\r?\n/);
        const aRegex = /alpha\d{9,11}z/gi;
        const cRegex = /\d{6}_[a-zA-Z]{2,3}-\d{5,8}/gi;
        
        for (let urli = 0; urli < urls.length; urli++) {
            
            console.log(urls[urli]);
            document.getElementById('result').innerHTML += urls[urli] + '<br>';
            if(urls[urli].includes("channelB-") && (urls[urli].includes("domainB.de") || urls[urli].includes("domain-B.de"))) {
                console.log('Row ' + imgrow + ' is B Domain + B URL.');
                document.getElementById('result').innerHTML += 'Row ' + imgrow + ' is B Domain + B URL.' + '<br>';
            } else if(aRegex.test(urls[urli])) {
                console.log('Row ' + imgrow + ' is A URL.');
                document.getElementById('result').innerHTML += 'Row ' + imgrow + ' is A URL.' + '<br>';
            } else if(cRegex.test(urls[urli])) {
                console.log('Row ' + imgrow + ' is C URL.');
                document.getElementById('result').innerHTML += 'Row ' + imgrow + ' is C URL.' + '<br>';
            } else {
                console.log(cRegex);
                document.getElementById('result').innerHTML += cRegex + '<br>';
                console.log(cRegex.test(urls[urli]));
                document.getElementById('result').innerHTML += cRegex.test(urls[urli]) + '<br>';
            }
            
            console.log('NEXT URL');
            document.getElementById('result').innerHTML += '<br><br>';
            ++imgrow;
        }
    }
    </script>
    <div class="wrapper">
        <div class="container-fluid">
            <form name="myForm" id="myForm">
                <div class="form-group">
                    <h4>example input</h4>
                    <p style="font-size: 80%;">
https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg<br>
https://cdn.domain.de/uploads/sites/4/2022/04/alpha1234567890z.jpg?resize=1024%2C600<br>
https://www.domainy.de/wp-content/uploads/150411_AB-43827__DSC1378.jpg<br>
https://www.domainB.de/wp-content/uploads/2022/07/channelB-881123-maxm-20220805-696x464.jpg<br>
https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg<br>
https://www.domain.de/wp-content/uploads/200115_AB-55789__DSC1235.jpg
                    </p>
                    <h4>relevant criteria (for now):</h4>
                        <p>A: not B && includes something like alpha[9-11digits]z <br>
                        B: includes channelB- && ( includes domainB.de || includes domain-B.de)<br>
                        C: not A && not B && includes something like [6digits]_[2-3letter]-[5-8digits]<br></p>
                    </div>
                <div class="form-group">
                    <h4>Image URLs (1 per row)</h4>
                    <textarea rows="5" name="imgurls" class="form-control" required onchange="preprocessImgURL(this.value)" style="font-size: 80%;"></textarea>
                </div>
            </form>
            
            <div  style="font-family: monospace;" id="result">
                <h4>Result:</h4>
            </div>
            
            <div style="font-family: monospace;">
                <h4>Expected/experienced Result for example input:</h4>
                <p>Row 0 is C URL. / πŸ†— <br>
                Row 1 is A URL. / πŸ†— <br>
                Row 2 is C URL. / πŸ›‘ <br>
                Row 3 is B URL. / πŸ†— <br>
                Row 4 is C URL. / πŸ†— <br>
                Row 5 is C URL. / πŸ›‘ <br></p>
            </div>     
        </div>
    </div>
</body>
</html>


Solution

  • When using the very same regular expression multiple times, like you're doing in the loop, it may remember what it has processed:

    Using test() on a regex with the "global" flag

    When a regex has the global flag set, test() will advance the lastIndex of the regex. (RegExp.prototype.exec() also advances the lastIndex property.)

    Further calls to test(str) will resume searching str starting from lastIndex. The lastIndex property will continue to increase each time test() returns true.

    Note: As long as test() returns true, lastIndex will not resetβ€”even when testing a different string!

    When test() returns false, the calling regex's lastIndex property will reset to 0.

    The following example demonstrates this behavior:

    const regex = /foo/g; // the "global" flag is set
    
    // regex.lastIndex is at 0
    regex.test('foo')     // true
    
    // regex.lastIndex is now at 3
    regex.test('foo')     // false
    
    // regex.lastIndex is at 0
    regex.test('barfoo')  // true
    
    // regex.lastIndex is at 6
    regex.test('foobar')  // false
    
    // regex.lastIndex is at 0
    // (...and so on) 
    

    In your case, you do not need the global flag.