javascripthtmljsdom

How to serialize HTML without head and body tags using jsdom


I want to clear some tags from a html document like so:

const { JSDOM } = require('jsdom');

function clearAnchorTags(pDom: JSDOM) {
    if (typeof pDom === 'string') {
        pDom = new JSDOM(pDom);
    }

// Get all anchor tags from dom
for (const anchorTag of pDom.window.document.querySelectorAll('a')) {
    for (const child of anchorTag.children) {
        if (child.nodeName === 'EM') {
            const node = pDom.window.document.createTextNode(child.textContent);
            anchorTag.replaceChild(node, child);
        }
        if (child.nodeName === 'B') {
            const node = pDom.window.document.createTextNode(child.textContent);
            anchorTag.replaceChild(node, child);
        }
        if (child.nodeName === 'U') {
            const node = pDom.window.document.createTextNode(child.textContent);
            anchorTag.replaceChild(node, child);
        }
    }
}

// return as string just as we received it -- also replace   with space
return pDom.serialize().replace(/ /g, ' ');
}

The issue seems to be the serialize() method here at the end, as it returns a fully featured HTML document.

I need it without the <html>, <head> and <body> tags. I only need a HTML fragment.


Solution

  • I fixed my problem using

    return pDom.window.document.body.innerHTML.replace(/&nbsp;/g, ' ');