I am just trying to build a crawler with chrome-remote-interface but i don't know how to get multiple dom elements like specific targets id,classes.
for Ex:
price = document.getelementbyid('price')
name= document.getelementbyid('name')
Code
const CDP = require('chrome-remote-interface');
CDP((client) => {
// Extract used DevTools domains.
const {Page, Runtime} = client;
// Enable events on domains we are interested in.
Promise.all([
Page.enable()
]).then(() => {
return Page.navigate({url: 'http://example.com'})
});
// Evaluate outerHTML after page has loaded.
Page.loadEventFired(() => {
Runtime.evaluate({expression: 'document.body.outerHTML'}).then((result) => {
//How to get Multiple Dom elements
console.log(result.result.value);
client.close();
});
});
}).on('error', (err) => {
console.error('Cannot connect to browser:', err);
});
Update
const CDP = require('chrome-remote-interface');
CDP((client) => {
// Extract used DevTools domains.
const {DOM,Page, Runtime} = client;
// Enable events on domains we are interested in.
Promise.all([
Page.enable()
]).then(() => {
return Page.navigate({url: 'https://someDomain.com'});
})
Page.loadEventFired(() => {
const expression = `({
test: document.getElementsByClassName('rows')),
})`
Runtime.evaluate({expression,returnByValue: true}).then((result) => {
console.log(result.result) // Error
client.close()
})
})
}).on('error', (err) => {
console.error('Cannot connect to browser:', err);
});
Error
{ type: 'object',
subtype: 'error',
className: 'SyntaxError',
description: 'SyntaxError: Unexpected token )',
objectId: '{"injectedScriptId":14,"id":1}' }
Actually I want to iterate over the list of elements But I don't know where it goes wrong
You cannot move DOM object from the browser context to the Node.js context, all you can do is pass a property or whatever can be considered a JSON object. Here I'm assuming you're interested in the computed HTML.
A possible solution is:
const CDP = require('chrome-remote-interface');
CDP((client) => {
// Extract used DevTools domains.
const {Page, Runtime} = client;
// Enable events on domains we are interested in.
Promise.all([
Page.enable()
]).then(() => {
return Page.navigate({url: 'http://example.com'});
});
// Evaluate outerHTML after page has loaded.
Page.loadEventFired(() => {
const expression = `({
name: document.getElementById('name').outerHTML,
price: document.getElementById('price').outerHTML
})`;
Runtime.evaluate({
expression,
returnByValue: true
}).then(({result}) => {
const {name, price} = result.value;
console.log(`name: ${name}`);
console.log(`price: ${price}`);
client.close();
});
});
}).on('error', (err) => {
console.error('Cannot connect to browser:', err);
});
The key point is returning a JSON object using returnByValue: true
.
Update: You have an error in your expression, a trailing )
in ...('rows')),
. But even if you fix it you'd still end up in a wrong situation because you're attempting to pass an array of DOM objects (see the first paragraph of this answer). Again, if you want just the outer HTML you can do something like:
// Evaluate outerHTML after page has loaded.
Page.loadEventFired(() => {
const expression = `
// fetch an array-like of DOM elements
var elements = document.getElementsByTagName('p');
// create and return an array containing
// just a property (in this case `outerHTML`)
Array.prototype.map.call(elements, x => x.outerHTML);
`;
Runtime.evaluate({
expression,
returnByValue: true
}).then(({result}) => {
// this is the returned array
const elements = result.value;
elements.forEach((html) => {
console.log(`- ${html}`);
});
client.close();
});
});