I want get the content of attribute 'src' of all image from a page of my website. I can do this with this simple code :
var x = require('casper').selectXPath;
var casper = require('casper').create({
verbose: true,
logLevel: "debug",
});
casper.start('http://mylocalwebsite.com', function() {
var urls = casper.getElementsAttribute(x('//img'), 'src'); // "Google"
console.log(urls); // urls are displayed in my shell
});
casper.run();
But, I want disable javascript when I use my script, I modify my initial code by :
var x = require('casper').selectXPath;
var casper = require('casper').create({
verbose: true,
logLevel: "debug",
pageSettings: {
javascriptEnabled: false,
},
});
casper.start('http://mylocalwebsite.com', function() {
var urls = casper.getElementsAttribute(x('//img'), 'src'); // "Google"
console.log(urls); // this log doesn't appears from my shell
});
casper.run();
(property 'javascriptEnabled' set to false)
But If I do that, my script stop working just after the getElementsAttribute line.
I understand this , I can't use a casperjs selector when I disable javascript. Is it right ? Is not possible to disable javascript and scrape element in my page ?
Is there a way for get src content attribute of all images from a script where I disable javascript ?
Thank you !
You can prevent all js of the website from running, by canceling such requests:
var casper = require('casper').create({
verbose: true,
logLevel: "debug",
});
casper.on('resource.requested', function(requestData, request){
if (/\.js/.test(requestData.url)) {
casper.log("skipped " + requestData.url);
return request.cancel();
}
});
and keep js enabled for casper.
js files might not have .js
suffix in their URL, if resource files are not required, you can also cancel all resource request.