Below is the output of alert( rd.innerHTML );
:
<ruby><rb>「わたし</rb><rt><br>watashi</rt></ruby><ruby><rb>は</rb><rt><br>wa</rt></ruby>」<Name des Sprechers><br><ruby><rb>です</rb><rt><br>desu</rt></ruby>。
I like to iterate over all those nodes. I tried (among others):
var items = rd.getElementsByTagName("*");
for (var i = 0; i < items.length; i++) {
var item = items[i];
alert( item.innerHTML);
}
But for the loop above I will get
<rb>「わたし</rb><rt><br>watashi</rt>
「わたし
\n watashi
<rb>は</rb><rt><br>wa</rt>
は
\n wa
<rb>です</rb><rt><br>desu</rt>
です
\n desu
How can I loop this HTML and also get the node 」<Name des Sprechers><br>
Ideally I would like to have all rb
elements and the text node in this order:
<rb>「わたし</rb><rt><br>watashi</rt>
<rb>は</rb><rt><br>wa</rt>
」<Name des Sprechers><br>
<rb>です</rb><rt><br>desu</rt>
I need this to convert single-sided furigana produced by Anki to double-sided furigana in case there is a <br>
node within the furigana - as explained here.
The rd.innerHTML
is the furigana produced by Anki. As @Pointy wrote, it...
...is not a single fragment; there are top-level elements (relative to the fragment) but no single enclosing tag.
Here is a card template including my setupCard()
function after fixing it using @it-goldman 's solution:
<div class="jp" id="fldReading"> {{furigana:Reading}} </div><br>
<hr id=answer>
{{Meaning}}
<script language="javascript">
var rd = document.getElementById('fldReading');
/*
* Use nested markup for double-sided ruby positioning
* https://w3c.github.io/i18n-drafts/articles/ruby/styling.en#double_position
*/
function setupCard() {
var newDiv;
var parentRuby;
var rubyDiv;
/* Create a new div to hold the transformed structure */
newDiv = document.createElement('div');
/* Create a new div to hold the next ruby */
rubyDiv = document.createElement('div');
rubyDiv.className = 'jp';
//alert( rd.innerText );
//alert( rd.innerHTML );
/* Iterate through all nodes to transform the structure */
for (var child = rd.firstChild; child; child = child.nextSibling) {
//alert( child.innerHTML + '<br>' + child.innerText + '<br>' + child.textContent);
if (child.nodeType != 3) {
//alert( child.textContent );
/* Add parent ruby element for nested markup */
rubyDiv.appendChild(document.createElement('ruby'));
parentRuby = rubyDiv.children[0];
if(child.tagName === "RUBY") {
/* get rb element */
var rb = child.querySelector('rb');
if(rb) {
//alert( rb.innerHTML );
/* Select the rt elements */
var rtElements = child.querySelectorAll('rt');
/* Create a new ruby element for the rb text */
var newRuby = document.createElement('ruby');
newRuby.appendChild(document.createTextNode(rb.textContent));
/* Append the new ruby to the parent ruby */
parentRuby.appendChild(newRuby);
/* Append rt elements to the new ruby */
rtElements.forEach((rt) => {
//alert( rt.innerHTML );
/* Split rt text at <br> */
var textWithLineBreaks = rt.innerHTML.replace(/<br\s*\/?>/gi, '\n');
var arr = textWithLineBreaks.split('\n');
/* Create new rt element for hiragana furigana */
var newRt = document.createElement('rt');
newRt.appendChild(document.createTextNode(arr[0]));
/* Append the new furigana to the new ruby */
newRuby.appendChild(newRt);
/* Is there is a second furigana (after <br>)? */
if (arr[1]) {
/* Create a new rt for the romaji furigana */
var newRt = document.createElement('rt');
newRt.appendChild(document.createTextNode(arr[1]));
/* Append this directly to the parent ruby */
/* (for nested markup) */
parentRuby.appendChild(newRt);
/* Append a rb element (for nested markup) */
parentRuby.appendChild(document.createElement('rb'));
}
});
/* append the ruby structure */
newDiv.appendChild(rubyDiv);
}
}
if(child.tagName === "BR") {
rubyDiv = document.createElement('div');
rubyDiv.className = 'jp';
/* Add parent ruby element for nested markup */
rubyDiv.appendChild(document.createElement('ruby'));
parentRuby = rubyDiv.children[0];
}
} else {
//alert( child.textContent );
/* append the text node */
rubyDiv.appendChild(document.createTextNode(child.textContent));
newDiv.appendChild(rubyDiv);
}
};
/* Replace jp div content by the transformed HTML */
rd.innerHTML = newDiv.innerHTML;
//alert( newDiv.innerHTML );
}
setupCard();
</script>
Where did Sprechers
disappear in your output? Well you need to traverse the nodes
rather than the elements
. I'm re-using a function to extract text nodes from a node / element.
var html = `<ruby><rb>「わたし</rb><rt><br>watashi</rt></ruby><ruby><rb>は</rb><rt><br>wa</rt></ruby>」<Name des Sprechers><br><ruby><rb>です</rb><rt><br>desu</rt></ruby>`
var rd = document.createElement("div");
rd.innerHTML = html;
function textUnder(node) {
var all = [];
for (node = node.firstChild; node; node = node.nextSibling) {
if (node.nodeType == 3) {
console.log("text node of " + node.parentElement.tagName + ": " + node.textContent)
all.push(node.textContent);
} else {
all = all.concat(textUnder(node));
}
}
return all;
}
var texts = textUnder(rd)
console.log(texts)
.as-console-wrapper {
min-height: 100%;
}