javascripthtmlanki

Get all items of a HTML element


Below is the output of alert( rd.innerHTML );:

 <ruby><rb>「わたし</rb><rt><br>watashi</rt></ruby><ruby><rb>は</rb><rt><br>wa</rt></ruby>」&lt;Name des Sprechers&gt;<br><ruby><rb>です</rb><rt><br>desu</rt></ruby>。

I like to iterate over all those nodes. I tried (among others):

var items = rd.getElementsByTagName("*");
for (var i = 0; i < items.length; i++) {
    var item = items[i];
    alert( item.innerHTML);
}

But for the loop above I will get

How can I loop this HTML and also get the node 」&lt;Name des Sprechers&gt;<br>

Ideally I would like to have all rb elements and the text node in this order:

Background

I need this to convert single-sided furigana produced by Anki to double-sided furigana in case there is a <br> node within the furigana - as explained here.

The rd.innerHTML is the furigana produced by Anki. As @Pointy wrote, it...

...is not a single fragment; there are top-level elements (relative to the fragment) but no single enclosing tag.

Here is a card template including my setupCard() function after fixing it using @it-goldman 's solution:

<div class="jp" id="fldReading"> {{furigana:Reading}} </div><br>

<hr id=answer>

{{Meaning}}

<script language="javascript">

var rd = document.getElementById('fldReading');

/*
 * Use nested markup for double-sided ruby positioning
 * https://w3c.github.io/i18n-drafts/articles/ruby/styling.en#double_position
 */
function setupCard() {
    var newDiv;
    var parentRuby;
    var rubyDiv;

    /* Create a new div to hold the transformed structure */
    newDiv = document.createElement('div');
    /* Create a new div to hold the next ruby */
    rubyDiv = document.createElement('div');
    rubyDiv.className = 'jp';
    //alert( rd.innerText );
    //alert( rd.innerHTML );

    /* Iterate through all nodes to transform the structure */
    for (var child = rd.firstChild; child; child = child.nextSibling) {
        //alert( child.innerHTML + '<br>' + child.innerText  + '<br>' + child.textContent);
        if (child.nodeType != 3) {
            //alert( child.textContent );
            /* Add parent ruby element for nested markup */
            rubyDiv.appendChild(document.createElement('ruby'));
            parentRuby = rubyDiv.children[0];
            if(child.tagName === "RUBY") {
                /* get rb element */
                var rb = child.querySelector('rb');

                if(rb) {
                    //alert( rb.innerHTML );
                    /* Select the rt elements */
                    var rtElements = child.querySelectorAll('rt');
                    /* Create a new ruby element for the rb text */
                    var newRuby = document.createElement('ruby');
                    newRuby.appendChild(document.createTextNode(rb.textContent));
                    /* Append the new ruby to the parent ruby */
                    parentRuby.appendChild(newRuby);

                    /* Append rt elements to the new ruby */
                    rtElements.forEach((rt) => {
                        //alert( rt.innerHTML );
                        /* Split rt text at <br> */
                        var textWithLineBreaks = rt.innerHTML.replace(/<br\s*\/?>/gi, '\n');
                        var arr = textWithLineBreaks.split('\n');
                        /* Create new rt element for hiragana furigana */
                        var newRt = document.createElement('rt');
                        newRt.appendChild(document.createTextNode(arr[0]));
                        /* Append the new furigana to the new ruby */
                        newRuby.appendChild(newRt);

                        /* Is there is a second furigana (after <br>)? */
                        if (arr[1]) {
                            /* Create a new rt for the romaji furigana */
                            var newRt = document.createElement('rt');
                            newRt.appendChild(document.createTextNode(arr[1]));
                            /* Append this directly to the parent ruby */
                            /* (for nested markup) */
                            parentRuby.appendChild(newRt);
                            /* Append a rb element (for nested markup) */
                            parentRuby.appendChild(document.createElement('rb'));
                        }
                    });
                    /* append the ruby structure */
                    newDiv.appendChild(rubyDiv);
                }
            }
            if(child.tagName === "BR") {
                    rubyDiv = document.createElement('div');
                    rubyDiv.className = 'jp';
                    /* Add parent ruby element for nested markup */
                    rubyDiv.appendChild(document.createElement('ruby'));
                    parentRuby = rubyDiv.children[0];
            }
        } else {
            //alert( child.textContent );
            /* append the text node */
            rubyDiv.appendChild(document.createTextNode(child.textContent));
            newDiv.appendChild(rubyDiv);
        }
    };

    /* Replace jp div content by the transformed HTML */
    rd.innerHTML = newDiv.innerHTML;
    //alert( newDiv.innerHTML );
}

setupCard();

</script>

Solution

  • Where did Sprechers disappear in your output? Well you need to traverse the nodes rather than the elements. I'm re-using a function to extract text nodes from a node / element.

    var html = `<ruby><rb>「わたし</rb><rt><br>watashi</rt></ruby><ruby><rb>は</rb><rt><br>wa</rt></ruby>」&lt;Name des Sprechers&gt;<br><ruby><rb>です</rb><rt><br>desu</rt></ruby>`
    
    var rd = document.createElement("div");
    rd.innerHTML = html;
    
    
    function textUnder(node) {
      var all = [];
      for (node = node.firstChild; node; node = node.nextSibling) {
        if (node.nodeType == 3) {
          console.log("text node of " + node.parentElement.tagName + ": " +  node.textContent)
          all.push(node.textContent);
        } else {
          all = all.concat(textUnder(node));
        }
      }
      return all;
    }
    
    var texts = textUnder(rd)
    console.log(texts)
    .as-console-wrapper {
      min-height: 100%;
    }