I read the documents and look like you need to have slimerjs http://docs.casperjs.org/en/latest/events-filters.html in order to get responseData.body
from the page.resource.received
event.
My use case is to download the images when the page is loaded so I don't do another round trip of: get the resource JSON, download and save any image file, reload the file to check for image dimension and if too small (icons) -> eliminate.
I am wondering if there is a better way to do this. I could in fact go do evaluate
img
selector but some websites use background-url
css and that's tricky.
Evaluate can be tricky, but here is a possible approach : (PhantomJS Only)
With this example theres the possibility of
storing the clipRects
of the images matching specific criteria for
posterior page.render()
of those elements into screen shot files.
storing the urls of the matching resources for posterior download requests
capturing the url inside 'src' attribute or the 'background-image' css property, with an attempt of also obtaining the width & height for criteria match-and-capture purposes.
var page = require('webpage').create();
page.onConsoleMessage = function(msg) {console.log(msg);};
console.log('[#] I M A G E · N I N J A');
page.open('http://cartawifi.com', function(status) {
var clipRectList = page.evaluate(function(pagex) {
// .: Captured Images : In-Memory Reference Storage :.
const IMAGES = {
'src':{'indxs':[],'ref':[]},
'background-image':{'indxs':[],'ref':[]},
'selectors':[]
};
var clipRects = []; // maybe you want to take page screenshots of specific elements containing matching images
var capturedImages = 0; var totalElements = 0;
// .: Define Image Capture : Min/Max Width/Height :.
const minWidth = 1; const minHeight = 1;
const maxWidth = 9999; const maxHeight = 9999;
const regxp = new RegExp('url');
$('*').each(function(index, el) { var ignore=false;
// search for elements with 'background-image' css property
if($(el).css('background-image')!=null!=null){
var wu = $(this).css('width');
var width = parseFloat(wu.replace('px',''));
var hu = $(this).css('height');
var height = parseFloat(wu.replace('px',''));
var src = $(el).css('background-image');
var group = "background-image"
if(!src.match(regxp)){ignore=true;}else{
//remove the keep the contents inside the 'url()' string'
src = (($(el).css('background-image')).slice(4));
src = src.substring(0, src.length - 1);
}
}
// search for elements with 'src' html attribute
else if($(el).attr('src')!=null){
var width = $(this).get(0).naturalWidth;
var height = $(this).get(0).naturalHeight;
var group = "src"
var src = $(el).attr('src');
}
//---------------------------------------------------------
if(width>=minWidth&&height>=minWidth&&
width<=maxWidth&&height<=maxWidth&&
!ignore){
IMAGES[group].indxs.push(index);
IMAGES[group].ref.push(src);
IMAGES.selectors.push(this);
capturedImages++;
console.log(" [captured] :",group,width,height,src);
//:store clipRect for this element
var clipR = $.extend({},$(el).offset(),{width: $(el).offsetWidth,height: $(el).offsetHeight});
console.log(" (clipRect)",JSON.stringify(clipR));
clipRects.push(clipR);
}
totalElements++;
});
// report information :
console.log('[i] Total Elements Parsed : ',totalElements);
console.log('[*] Total Images Captured : ',capturedImages);
console.log(' > [src] : ',IMAGES['src'].indxs.length);
console.log(' > [background-image] : ',IMAGES['background-image'].indxs.length);
});
console.log('[!] TO-DO : STORE CAPTURED IMAGES AS FILES');
phantom.exit();
});