Per the question, how should one go about completely extracting the client-side HTML code from an encapsulated shadow DOM node which also contains further nested child shadow nodes?
For reference: other questions on Stack Overflow I've visited which had answers that did not help: 1 2 3 4 5 6
I initially used the following code, but it failed to scrape any of the nested shadow DOMs:
const getDeepShadowDomHtml = (element) => {
let htmlContent = '';
// Recursively capture all shadow DOMs and their content
const processElement = (el) => {
if (el.shadowRoot) {
htmlContent += `<${el.tagName.toLowerCase()}${Array.from(el.attributes).map(attr => ` ${attr.name}="${attr.value}"`).join('')}>`;
Array.from(el.shadowRoot.childNodes).forEach(child => processElement(child));
htmlContent += `</${el.tagName.toLowerCase()}>`;
} else {
htmlContent += el.nodeValue || el.outerHTML || '';
}
};
processElement(element);
return htmlContent;
};
const findNestedShadowRoot = (startElement, selector) => {
let element = startElement;
const selectors = selector.split(' ').filter(Boolean);
for (const sel of selectors) {
element = element.shadowRoot.querySelector(sel);
if (!element) break;
}
return element;
};
const behaviorTabElement = findNestedShadowRoot(document.querySelector('shadow-host-selector'), 'first-shadow-selector second-shadow-selector #behaviourtab');
if (behaviorTabElement) {
const shadowDomContent = getDeepShadowDomHtml(behaviorTabElement);
console.log(shadowDomContent); // Verify content before download
const blob = new Blob([shadowDomContent], { type: 'text/html' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'behaviorTabShadowDOMContent.html';
document.body.appendChild(a);
a.click();
URL.revokeObjectURL(url);
} else {
console.log("The #behaviourtab element was not found.");
}
The following code worked for me. Comments are included in the code.
// jQuery for good measure
var script document.createElement('script');
script.src = 'https://code.jquery.com/jquery-latest.min.js';
document.getElementsByTagName('head')[0].appendChild(script);
const captureShadowDom = (element) => {
let htmlContent = '';
const processNode = (node) => {
if (node.nodeType === Node.ELEMENT_NODE) {
let nodeHtml = `<${node.tagName.toLowerCase()}`;
// Captures element attributes
for (const attr of node.attributes) {
nodeHtml += ` ${attr.name}="${attr.value}"`;
}
nodeHtml += '>';
// Checks for shadow DOM to recursively capture its content
if (node.shadowRoot) {
nodeHtml += captureShadowDom(node.shadowRoot);
}
// Captures the children of this element
for (const child of node.childNodes) {
nodeHtml += processNode(child);
}
nodeHtml += `</${node.tagName.toLowerCase()}>`;
return nodeHtml;
}
// Text or other node types
return node.nodeValue || '';
};
// Starts processing the root element
for (const child of element.childNodes) {
htmlContent += processNode(child);
}
return htmlContent;
};
// Finds the further nested element
const findNestedShadowRoot = (startElement, selector) => {
let element = startElement;
const selectors = selector.split(' ').filter(Boolean);
for (const sel of selectors) {
element = element.shadowRoot.querySelector(sel);
if (!element) break;
}
return element;
};
// Update with the correct shadow DOM hierarchy
const behaviorTabElement = findNestedShadowRoot(document.querySelector('shadow-host-selector'), 'first-shadow-selector second-shadow-selector main-selector');
console.log(behaviorTabElement); // Check if the element is found
if (behaviorTabElement) {
const shadowDomContent = collectShadowDomContent(behaviorTabElement);
console.log(shadowDomContent); // Check if content is being collected
}
if (behaviorTabElement) {
const shadowDomContent = captureShadowDom(behaviorTabElement.shadowRoot);
console.log(shadowDomContent); // Debug output
const blob = new Blob([shadowDomContent], {
type: 'text/html'
});
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'ShadowDOMContent.html';
document.body.appendChild(a);
a.click();
URL.revokeObjectURL(url);
} else {
console.log("The element was not found.");
}
This may not be the most efficient code for this and I'm not doing this by profession but just for fun, so if anyone has a better working method, feel free to improve on this!