javascriptregexnode.jspromiseesprima

How to identify the following code patterns


I have a pattern of js promises that I want to identify for several keywords

For example if I put code like:

var deferred = Q.defer();

And in the file I have also the following respective value

deferred.reject(err);
deferred.resolve();
return deferred.promise;

The complete code

EXAMPLE 1

function writeError(errMessage) {
    var deferred = Q.defer();
    fs.writeFile("errors.log", errMessage, function (err) {
        if (err) {
            deferred.reject(err);
        } else {
            deferred.resolve();
        }
    });
    return deferred.promise;
}

And I want that if I put large code file (as string) to find that this file contain the pattern

Another example

var d = Q.defer(); /* or $q.defer */

And in the file you have also the following respective value

d.resolve(val);
d.reject(err); 
return d.promise;

Complete EXAMPLE 2

function getStuffDone(param) {           
    var d = Q.defer(); /* or $q.defer */ 

    Promise(function(resolve, reject) {
        // or = new $.Deferred() etc.        
        myPromiseFn(param+1)                 
        .then(function(val) { /* or .done */ 
            d.resolve(val);                  
        }).catch(function(err) { /* .fail */ 
            d.reject(err);                   
        });                                  
        return d.promise; /* or promise() */ 

}                  

There is open sources which can be used to do such analysis(provide a pattern and it will found...)

There is some more complex patters with childProcess but for now this is OK :)


Solution

  • UPDATE: I made one correction to the code, i.e. changed set[2] to set[set.length - 1] to accommodate query sets of any size. I then applied the exact same algorithm to your two examples.

    The solution I provide follows some rules that I think are reasonable for the type of search you are proposing. Assume you are looking for four lines, ABCD (case insensitive, so it will find ABCD or abcd or aBcD):

    The code snippet below shows an example search. It does not demonstrate all of the edge cases. However, it does show the overall functionality.

    var queryRegexStrs = [
      "I( really)? (like|adore) strawberry",
      "I( really)? (like|adore) chocolate",
      "I( really)? (like|adore) vanilla"
    ];
    
    var codeStr =
      "....\n" +
      "Most people would say 'I like vanilla'\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "Amir's taste profile:\n" +
      "....\n" +
      "I like strawberry\n" +
      "....\n" +
      "....\n" +
      "I told Billy that I really adore chocolate a lot\n" +
      "....\n" +
      "I like vanilla most of the time\n" +
      "....\n" +
      "Let me emphasize that I like strawberry\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "Juanita's taste profile:\n" +
      "....\n" +
      "I really adore strawberry\n" +
      "I like vanilla\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "Rachel's taste profile:\n" +
      "I adore strawberry\n" +
      "....\n" +
      "Sometimes I like chocolate, I guess\n" +
      "....\n" +
      "I adore vanilla\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "....\n" +
      "";
    
    // allow for different types of end-of-line characters or character sequences
    var endOfLineStr = "\n";
    
    var matchSets = search(queryRegexStrs, codeStr, endOfLineStr);
    
    
    
    
    
    function search(queryRegexStrs, codeStr, endOfLineStr) {
    
      // break the large code string into an array of line strings
      var codeLines = codeStr.split(endOfLineStr);
    
      // remember the number of lines being sought
      var numQueryLines = queryRegexStrs.length;
    
      // convert the input regex strings into actual regex's in a parallel array
      var queryRegexs = queryRegexStrs.map(function(queryRegexStr) {
        return new RegExp(queryRegexStr);
      });
    
      // search the array for each query line
      //   to find complete, uninterrupted, non-repeating sets of matches
    
      // make an array to hold potentially multiple match sets from the same file
      var matchSets = [];
    
      // prepare to try finding the next match set
      var currMatchSet;
    
      // keep track of which query line number is currently being sought
      var idxOfCurrQuery = 0;
    
      // whenever looking for a match set is (re-)initialized,
      //   start looking again for the first query,
      //   and forget any previous individual query matches that have been found
      var resetCurrQuery = function() {
        idxOfCurrQuery = 0;
        currMatchSet = [];
      };
    
      // check each line of code...
      codeLines.forEach(function(codeLine, codeLineNum, codeLines) {
    
        // ...against each query line
        queryRegexs.forEach(function(regex, regexNum, regexs) {
    
          // check if this line of code is a match with this query line
          var matchFound = regex.test(codeLine);
    
          // if so, remember which query line it matched
          if (matchFound) {
    
            // if this code line matches the first query line,
            //   then reset the current query and continue
            if (regexNum === 0) {
              resetCurrQuery();
            }
    
            // if this most recent individual match is the one expected next, proceed
            if (regexNum === idxOfCurrQuery) {
    
              // temporarily remember the line number of this most recent individual match
              currMatchSet.push(codeLineNum);
    
              // prepare to find the next query in the sequence
              idxOfCurrQuery += 1;
    
              // if a whole query set has just been found, then permanently remember
              //   the corresponding code line numbers, and reset the search
              if (idxOfCurrQuery === numQueryLines) {
                matchSets.push(currMatchSet);
                resetCurrQuery();
              }
    
              // if this most recent match is NOT the one expected next in the sequence,
              //   then start over in terms of starting to look again for the first query
            } else {
              resetCurrQuery();
            }
          }
        });
      });
    
      return matchSets;
    
    }
    
    
    
    
    // report the results
    document.write("<b>The code lines being sought:</b>");
    document.write("<pre>" + JSON.stringify(queryRegexStrs, null, 2) + "</pre>");
    document.write("<b>The code being searched:</b>");
    document.write(
      "<pre><ol start='0'><li>" +
      codeStr.replace(new RegExp("\n", "g"), "</li><li>") +
      "</li></ol></pre>"
    );
    document.write("<b>The code line numbers of query 'hits', grouped by query set:</b>");
    document.write("<pre>" + JSON.stringify(matchSets) + "</pre>");
    document.write("<b>One possible formatted output:</b>");
    
    var str = "<p>(Note that line numbers are 0-based...easily changed to 1-based if desired)</p>";
    str += "<pre>";
    matchSets.forEach(function(set, setNum, arr) {
      str += "Matching code block #" + (setNum + 1) + ": lines " + set[0] + "-" + set[set.length - 1] + "<br />";
    });
    str += "</pre>";
    document.write(str);

    Here is the exact same algorithm, just using your original examples 1 and 2. Note a couple of things. First of all, anything that needs escaping in the regex strings actually needs double-escaping, e.g. in order to find a literal opening parenthesis you need to include "\\(" not just "\(". Also, the regex's perhaps seem a little complex. I have two comments about this. First: a lot of that is just finding the literal periods and parentheses. However, second, and importantly: the ability to use complex regex's is part of the power (read "flexibility") of this entire approach. e.g. The examples you provided required some alternation where, e.g., "a|b" means "find a OR b".

    var queryRegexStrs = [
      "var deferred = Q\\.defer\\(\\);",
      "deferred\\.reject\\(err\\);",
      "deferred\\.resolve\\(\\);",
      "return deferred\\.promise;"
    ];
    
    var codeStr =
      'function writeError(errMessage) {'                           + "\n" +
      '    var deferred = Q.defer();'                               + "\n" +
      '    fs.writeFile("errors.log", errMessage, function (err) {' + "\n" +
      '        if (err) {'                                          + "\n" +
      '            deferred.reject(err);'                           + "\n" +
      '        } else {'                                            + "\n" +
      '            deferred.resolve();'                             + "\n" +
      '        }'                                                   + "\n" +
      '    });'                                                     + "\n" +
      '    return deferred.promise;'                                + "\n" +
      '}'                                                           + "\n" +
      '';
    
    // allow for different types of end-of-line characters or character sequences
    var endOfLineStr = "\n";
    
    var matchSets = search(queryRegexStrs, codeStr, endOfLineStr);
    
    
    
    
    
    function search(queryRegexStrs, codeStr, endOfLineStr) {
    
      // break the large code string into an array of line strings
      var codeLines = codeStr.split(endOfLineStr);
    
      // remember the number of lines being sought
      var numQueryLines = queryRegexStrs.length;
    
      // convert the input regex strings into actual regex's in a parallel array
      var queryRegexs = queryRegexStrs.map(function(queryRegexStr) {
        return new RegExp(queryRegexStr);
      });
    
      // search the array for each query line
      //   to find complete, uninterrupted, non-repeating sets of matches
    
      // make an array to hold potentially multiple match sets from the same file
      var matchSets = [];
    
      // prepare to try finding the next match set
      var currMatchSet;
    
      // keep track of which query line number is currently being sought
      var idxOfCurrQuery = 0;
    
      // whenever looking for a match set is (re-)initialized,
      //   start looking again for the first query,
      //   and forget any previous individual query matches that have been found
      var resetCurrQuery = function() {
        idxOfCurrQuery = 0;
        currMatchSet = [];
      };
    
      // check each line of code...
      codeLines.forEach(function(codeLine, codeLineNum, codeLines) {
    
        // ...against each query line
        queryRegexs.forEach(function(regex, regexNum, regexs) {
    
          // check if this line of code is a match with this query line
          var matchFound = regex.test(codeLine);
    
          // if so, remember which query line it matched
          if (matchFound) {
    
            // if this code line matches the first query line,
            //   then reset the current query and continue
            if (regexNum === 0) {
              resetCurrQuery();
            }
    
            // if this most recent individual match is the one expected next, proceed
            if (regexNum === idxOfCurrQuery) {
    
              // temporarily remember the line number of this most recent individual match
              currMatchSet.push(codeLineNum);
    
              // prepare to find the next query in the sequence
              idxOfCurrQuery += 1;
    
              // if a whole query set has just been found, then permanently remember
              //   the corresponding code line numbers, and reset the search
              if (idxOfCurrQuery === numQueryLines) {
                matchSets.push(currMatchSet);
                resetCurrQuery();
              }
    
              // if this most recent match is NOT the one expected next in the sequence,
              //   then start over in terms of starting to look again for the first query
            } else {
              resetCurrQuery();
            }
          }
        });
      });
    
      return matchSets;
    
    }
    
    
    
    
    // report the results
    document.write("<b>The code lines being sought:</b>");
    document.write("<pre>" + JSON.stringify(queryRegexStrs, null, 2) + "</pre>");
    document.write("<b>The code being searched:</b>");
    document.write(
      "<pre><ol start='0'><li>" +
      codeStr.replace(new RegExp("\n", "g"), "</li><li>") +
      "</li></ol></pre>"
    );
    document.write("<b>The code line numbers of query 'hits', grouped by query set:</b>");
    document.write("<pre>" + JSON.stringify(matchSets) + "</pre>");
    document.write("<b>One possible formatted output:</b>");
    
    var str = "<p>(Note that line numbers are 0-based...easily changed to 1-based if desired)</p>";
    str += "<pre>";
    matchSets.forEach(function(set, setNum, arr) {
      str += "Matching code block #" + (setNum + 1) + ": lines " + set[0] + "-" + set[set.length - 1] + "<br />";
    });
    str += "</pre>";
    document.write(str);

    Here is the exact same algorithm, just using your original example 2:

    var queryRegexStrs = [
      "var d = (Q\\.defer\\(\\)|\\$q\\.defer);",
      "d\\.resolve\\(val\\);",
      "d\\.reject\\(err\\);",
      "return d\\.promise(\\(\\))?;"
    ];
    
    var codeStr =
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "function getStuffDone(param) {"               + "\n" +
      "    var d = Q.defer();"                       + "\n" +
      ""                                             + "\n" +
      "    Promise(function(resolve, reject) {"      + "\n" +
      "        // or = new $.Deferred() etc."        + "\n" +
      "        myPromiseFn(param+1)"                 + "\n" +
      "        .then(function(val) { /* or .done */" + "\n" +
      "            d.resolve(val);"                  + "\n" +
      "        }).catch(function(err) { /* .fail */" + "\n" +
      "            d.reject(err);"                   + "\n" +
      "        });"                                  + "\n" +
      "        return d.promise;"                    + "\n" +
      ""                                             + "\n" +
      "}"                                            + "\n" +
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "function getStuffDone(param) {"               + "\n" +
      "    var d = $q.defer;"                        + "\n" +
      ""                                             + "\n" +
      "    Promise(function(resolve, reject) {"      + "\n" +
      "        // or = new $.Deferred() etc."        + "\n" +
      "        myPromiseFn(param+1)"                 + "\n" +
      "        .then(function(val) { /* or .done */" + "\n" +
      "            d.resolve(val);"                  + "\n" +
      "        }).catch(function(err) { /* .fail */" + "\n" +
      "            d.reject(err);"                   + "\n" +
      "        });"                                  + "\n" +
      "        return d.promise();"                  + "\n" +
      ""                                             + "\n" +
      "}"                                            + "\n" +
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "...."                                         + "\n" +
      "";
    
    // allow for different types of end-of-line characters or character sequences
    var endOfLineStr = "\n";
    
    var matchSets = search(queryRegexStrs, codeStr, endOfLineStr);
    
    
    
    
    
    function search(queryRegexStrs, codeStr, endOfLineStr) {
    
      // break the large code string into an array of line strings
      var codeLines = codeStr.split(endOfLineStr);
    
      // remember the number of lines being sought
      var numQueryLines = queryRegexStrs.length;
    
      // convert the input regex strings into actual regex's in a parallel array
      var queryRegexs = queryRegexStrs.map(function(queryRegexStr) {
        return new RegExp(queryRegexStr);
      });
    
      // search the array for each query line
      //   to find complete, uninterrupted, non-repeating sets of matches
    
      // make an array to hold potentially multiple match sets from the same file
      var matchSets = [];
    
      // prepare to try finding the next match set
      var currMatchSet;
    
      // keep track of which query line number is currently being sought
      var idxOfCurrQuery = 0;
    
      // whenever looking for a match set is (re-)initialized,
      //   start looking again for the first query,
      //   and forget any previous individual query matches that have been found
      var resetCurrQuery = function() {
        idxOfCurrQuery = 0;
        currMatchSet = [];
      };
    
      // check each line of code...
      codeLines.forEach(function(codeLine, codeLineNum, codeLines) {
    
        // ...against each query line
        queryRegexs.forEach(function(regex, regexNum, regexs) {
    
          // check if this line of code is a match with this query line
          var matchFound = regex.test(codeLine);
    
          // if so, remember which query line it matched
          if (matchFound) {
    
            // if this code line matches the first query line,
            //   then reset the current query and continue
            if (regexNum === 0) {
              resetCurrQuery();
            }
    
            // if this most recent individual match is the one expected next, proceed
            if (regexNum === idxOfCurrQuery) {
    
              // temporarily remember the line number of this most recent individual match
              currMatchSet.push(codeLineNum);
    
              // prepare to find the next query in the sequence
              idxOfCurrQuery += 1;
    
              // if a whole query set has just been found, then permanently remember
              //   the corresponding code line numbers, and reset the search
              if (idxOfCurrQuery === numQueryLines) {
                matchSets.push(currMatchSet);
                resetCurrQuery();
              }
    
              // if this most recent match is NOT the one expected next in the sequence,
              //   then start over in terms of starting to look again for the first query
            } else {
              resetCurrQuery();
            }
          }
        });
      });
    
      return matchSets;
    
    }
    
    
    
    
    // report the results
    document.write("<b>The code lines being sought:</b>");
    document.write("<pre>" + JSON.stringify(queryRegexStrs, null, 2) + "</pre>");
    document.write("<b>The code being searched:</b>");
    document.write(
      "<pre><ol start='0'><li>" +
      codeStr.replace(new RegExp("\n", "g"), "</li><li>") +
      "</li></ol></pre>"
    );
    document.write("<b>The code line numbers of query 'hits', grouped by query set:</b>");
    document.write("<pre>" + JSON.stringify(matchSets) + "</pre>");
    document.write("<b>One possible formatted output:</b>");
    
    var str = "<p>(Note that line numbers are 0-based...easily changed to 1-based if desired)</p>";
    str += "<pre>";
    matchSets.forEach(function(set, setNum, arr) {
      str += "Matching code block #" + (setNum + 1) + ": lines " + set[0] + "-" + set[set.length - 1] + "<br />";
    });
    str += "</pre>";
    document.write(str);