javascriptphantomjscasperjs

How to wait for page loading when using casperjs?


I am trying to scrape a webpage which has a form with many dropdowns and values in the form are interdependent. At many point I need the code to wait till the refresh of the page complete. Eg after selecting an option from the list, the code should wait till the next list is populated based on this selection. It would be really helpful if someone could give pointers because strangely my code is working only after I gave so much unnecessary logging statements which in-turn created some delay. Any suggestions to improve the code would be very helpful.

var casper = require('casper').create({
     verbose: true,
     logLevel: 'debug',
     userAgent: 'Mozilla/5.0  poi poi poi (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.172 Safari/537.22',
     pageSettings: {}
 });

 casper.start('http://www.abc.com', function () {
     console.log("casper started");
     this.fill('form[action="http://www.abc.com/forum/member.php"]', {
         quick_username: "qwe",
         quick_password: "qwe"
     }, true);
     this.capture('screen.png');
 });
 casper.thenOpen("http://www.abc.com/search/index.php").then(function () {
     this.click('input[type="checkbox"][name="firstparam"]');
     this.click('a#poi');

     casper.evaluate(function () {
         document.getElementsByName("status")[0].value = 1;
         document.getElementsByName("state")[0].value = 1078;
         changeState(); //This function is associated with the dropdown ie state 
and the page reloads at this point. Only after complete refresh the code shoud execute! How can this be achieved?
         return true;
     });
     this.echo('Inside the first thenOpen' + this.evaluate(function () {
         return document.search.action;
     }));
 });
 casper.then(function () {
     this.capture("poi.png");
     console.log('just before injecting jquery');
     casper.page.injectJs('./jquery.js');
     this.click('input[type="checkbox"][name="or"]');
     this.evaluate(function () {
         $('.boxline .filelist input:checkbox[value=18127]').attr("checked", true);
     });
     this.echo('Just before pressing the add college button' + this.evaluate(function () {
         return document.search.action;
     }));
     this.capture('collegeticked.png');
     if (this.exists('input[type="button"][name="niv"]')) {
         this.echo('button is there');
     } else {
         this.echo('button is not there');
     }
     this.echo("Going to print return value");
     this.click('input[type="button"][name="poi"]'); // This click again causes a page refresh. Code should wait at this point for completion.
     this.echo('Immediately after pressing the add college btn getPresentState()' + this.evaluate(function () {
         return getPresentState();
     }));
     this.echo('Immediately after pressing add colleg button' + this.evaluate(function () {
         return document.search.action;
     }));
     this.capture('iu.png');
 });

 casper.then(function () {
     console.log('just before form submit');
     this.click('form[name="search"] input[type="submit"]'); //Again page refresh. Wait.
     this.echo('Immediately after search btn getPresentState()' + this.evaluate(function () {
         return getPresentState();
     }));
     this.echo('Immediately after search button-action' + this.evaluate(function () {
         return document.search.action;
     }));
     this.capture("mnf.png");
 });

 casper.then(function () {
     casper.page.injectJs('./jquery.js');
     this.capture("resultspage.png");

     this.echo('Page title is: ' + this.evaluate(function () {
         return document.title;
     }), 'INFO');
     var a = casper.evaluate(function () {
           return $('tbody tr td.tdbottom:contains("tye") ').siblings().filter($('td>a').parent());
     });
     console.log("ARBABU before" + a.length);
 });

 casper.run();

Solution

  • I've been using the waitForSelector 'workaround' mentioned by Arun here: https://stackoverflow.com/a/22217657/1842033

    It's the best solution I've found; the 'drawback' as it were is that you need to be aware of what element you're expecting to load. I say drawback, personally I don't think I've encountered a situation where I've not had some kind of feedback saying that whatever I'm waiting for has happened

    this.waitForSelector("{myElement}",
        function pass () {
            test.pass("Found {myElement}");
        },
        function fail () {
            test.fail("Did not load element {myElement}");
        },
        20000 // timeout limit in milliseconds
    );
    

    Although I'd guess you could use waitForResource() or something like that if you didn't have visual feedback.