javascriptnode.jsphantomjscasperjsslimerjs

Download resource image files using casperjs


I read the documents and look like you need to have slimerjs http://docs.casperjs.org/en/latest/events-filters.html in order to get responseData.body from the page.resource.received event.

My use case is to download the images when the page is loaded so I don't do another round trip of: get the resource JSON, download and save any image file, reload the file to check for image dimension and if too small (icons) -> eliminate.

I am wondering if there is a better way to do this. I could in fact go do evaluate img selector but some websites use background-url css and that's tricky.


Solution

  • Evaluate can be tricky, but here is a possible approach : (PhantomJS Only)

    With this example theres the possibility of

    1. storing the clipRects of the images matching specific criteria for posterior page.render() of those elements into screen shot files.

    2. storing the urls of the matching resources for posterior download requests

    3. capturing the url inside 'src' attribute or the 'background-image' css property, with an attempt of also obtaining the width & height for criteria match-and-capture purposes.

    var page = require('webpage').create();
    page.onConsoleMessage = function(msg) {console.log(msg);};
    console.log('[#] I M A G E · N I N J A');
    page.open('http://cartawifi.com', function(status) {
      var clipRectList = page.evaluate(function(pagex) {
        // .: Captured Images : In-Memory Reference Storage :.
        const IMAGES = {
            'src':{'indxs':[],'ref':[]},
            'background-image':{'indxs':[],'ref':[]},
            'selectors':[]
        }; 
        var clipRects = []; // maybe you want to take page screenshots of specific elements containing matching images
        var capturedImages = 0; var totalElements = 0;
        // .: Define Image Capture : Min/Max Width/Height :.
        const minWidth = 1; const minHeight = 1;
        const maxWidth = 9999;  const maxHeight = 9999;
        const regxp = new RegExp('url');
        $('*').each(function(index, el) { var ignore=false;
            // search for elements with 'background-image' css property
            if($(el).css('background-image')!=null!=null){
                var wu = $(this).css('width');
                var width = parseFloat(wu.replace('px',''));
                var hu = $(this).css('height');
                var height = parseFloat(wu.replace('px',''));
                var src = $(el).css('background-image');
                var group = "background-image"
                if(!src.match(regxp)){ignore=true;}else{
                    //remove the keep the contents inside the 'url()' string'
                    src = (($(el).css('background-image')).slice(4));
                    src = src.substring(0, src.length - 1);
                }
            }
            // search for elements with 'src' html attribute
            else if($(el).attr('src')!=null){
                var width = $(this).get(0).naturalWidth; 
                var height = $(this).get(0).naturalHeight;
                var group = "src" 
                var src = $(el).attr('src');
            }
            //---------------------------------------------------------
            if(width>=minWidth&&height>=minWidth&&
               width<=maxWidth&&height<=maxWidth&&
               !ignore){
                    IMAGES[group].indxs.push(index); 
                    IMAGES[group].ref.push(src); 
                    IMAGES.selectors.push(this); 
                    capturedImages++;
                    console.log("  [captured] :",group,width,height,src);
                    //:store clipRect for this element
                    var clipR = $.extend({},$(el).offset(),{width: $(el).offsetWidth,height: $(el).offsetHeight});
                    console.log("    (clipRect)",JSON.stringify(clipR));
                    clipRects.push(clipR);
            }
            totalElements++;
        });
        // report information :
        console.log('[i] Total Elements Parsed : ',totalElements);
        console.log('[*] Total Images Captured : ',capturedImages);
        console.log('     >              [src] : ',IMAGES['src'].indxs.length);
        console.log('     > [background-image] : ',IMAGES['background-image'].indxs.length);
      });
      console.log('[!] TO-DO : STORE CAPTURED IMAGES AS FILES');
      phantom.exit();
    });