javascriptjquerytextselectionrangy

Creating, setting styles on a range


I am trying to auto-detect addresses on a page and add the class "address" where found.

var rangyPatternApplier = function(element, pattern, style) {
  var innerText = element.innerText;
  var matches = innerText.match(pattern);
  if (matches) {
    for (var i = 0; i < matches.length; i++) {
      console.log("Match: " + matches[i]);
      var start = innerText.indexOf(matches[i]);
      var end = start + matches[i].length;

      let range = document.createRange();
      var start = innerText.indexOf(matches[i]);
      console.log('inner text: ' + innerText);
      console.log('start: ' + start);
      console.log('starts with: ' + innerText.substring(start));
      var end = start + matches[i].length;
      var startNode = element.childNodes[0];
      var endNode = startNode;
      while (startNode.nodeValue.length < start) {
        start -= startNode.nodeValue.length;
        end -= startNode.nodeValue.length;
        startNode = startNode.nextSibling;
        endNode = startNode;
        if (startNode == null) {
          error.reportError("Just wrong in Sections.rangyPatternApplier");
          return;
        }
      }
      while (endNode.nodeValue.length < end) {
        end -= endNode.nodeValue.length;
        if (endNode.nextSibling) endNode = endNode.nextSibling;
        while (!endNode.nodeValue) {
          endNode = endNode.childNodes[0];
        }
        if (endNode == null) {
          error.reportError("Just wrong in Sections.rangyPatternApplier");
        }
      } 
      range.setStart(startNode, start);
      console.log("starts with: " + startNode.nodeValue.substring(start));
      range.setEnd(endNode, end);

          var applier  = rangy.createClassApplier(style, {
                    elementTagName: "span",
                    elementProperties: {
                    },
          });
      window.getSelection().addRange(range);
      applier.toggleSelection();
    }
  }
}

Called via:

  $("P").each(function () {
    rangyPatternApplier(this, new RegExp("\\d+\\s[A-z]+\\s[A-z0-9]+\\s(Street|St|Avenue|Av|Ave|Road|Rd)", "mgi"), "Address");
  });

On text in a paragraph:

If the income renders the household ineligible for CA/CILOCA, the case will be systemically referred to the Administration for Children s Services Transitional Child Care Unit at 109 East 16th Street 3rd floor for evaluation of Transitional Child Care (TCC) benefits. The TCC Worker determines eligibility for up to 12 months of TCC benefits.

The regex is working, the address class is being applied. I am applying the range to the window selection because there appears to be a bug in rangy when applied just on the Range (I'm getting an error message). But somehow, when I create the range, the span appears 5 characters before the start of the address and ends 9 characters early. The early ending part could be due to the tag around the "th" in 16th street. But why is the range 5 characters earlier than what I'm finding in innerText?


Solution

  • Sheesh this was a pain but I got it working. Adding my solution here so hopefully at least a few people don't have to go through doing something that should be much more "built-in", in my opinion

    //nextTextNode is for getting the next text node from the DOM
    
    function nextTextNode(node) {
      if (node.nodeType == 1) { //element
        while (node.nodeType != 3) {
          node = node.firstChild;
        }
        return node;
      }
      if (node.nodeType == 3) { //text node
        if (node.nextSibling) {
          if (node.nextSibling.nodeType == 3) {
            return node.nextSibling;
          } else {
            return nextTextNode(node.nextSibling);
          }
        } else {
          while (!node.nextSibling) {
            node = node.parentNode;
            if (!node) return null;
          }
          if (node.nextSibling.nodeType == 3) {
            return node.nextSibling;
          } else {
            return nextTextNode(node.nextSibling);
          }
        }
      } else {
        throw new Error("nextTextNode: Node is either null, not connected to the DOM, or is not of node type 1 or 3");
      }
    }
    

    And then create range. Text nodes have extra newline and space characters compared to element.innerText . In the function below I track both the number of extra characters and the total characters to keep track of the inconsistancies between innerText and node.nodeValue and how many characters "in" it is.

    function createRangeForString(startElement, text) {
      var extras = 0;
      var innerText = startElement.innerText;
      var start = innerText.indexOf(text);
      if (start === -1) throw new Error ("createRangeForString. text: " + text + " not found in startElement");
      var textNode = nextTextNode(startElement);
      var totalCharsSeen = 0;
      var range = document.createRange();
      for (var i = 0; i < start; i++) { // I don't think I have to add extras in limit for i. Is already included
        if ((i + extras) - totalCharsSeen >= textNode.nodeValue.length) { //check if textNode is long enough
          totalCharsSeen += textNode.nodeValue.length;
          textNode = nextTextNode(textNode);
        }
        while (textNode.nodeValue.charAt(i + extras - totalCharsSeen) == "\n") {
          extras++;
        }
        while (textNode.nodeValue.charAt(i + extras - totalCharsSeen) == " " && innerText.charAt(i) != " ") {
          extras++;
        }
      }
      range.setStart(textNode, i + extras - totalCharsSeen);
    
      var end = start + text.length;
    
      for (var i = start + 1; i < end; i++) { // I don't think I have to add extras in limit for i. Is already included
        if ((i + extras) - totalCharsSeen >= textNode.nodeValue.length) { //check if textNode is long enough
          totalCharsSeen += textNode.nodeValue.length;
          textNode = nextTextNode(textNode);
        }
        while (textNode.nodeValue.charAt(i + extras - totalCharsSeen) == "\n") {
          extras++;
        }
        while (textNode.nodeValue.charAt(i + extras - totalCharsSeen) == " " && innerText.charAt(i) != " ") {
          extras++;
        }
      }
      range.setEnd(textNode, i + extras - totalCharsSeen);
      return range;
    }