javascriptperlxpathhtml-treebuilder

perl Find varying element id using HTML::Treebuilder


I am trying to use a websites in built search function to collect data from it but can't work out how to press the 'search' button as it has some javascript wrapped around it and the id changes with each new iteration of the page.

Data for the section of the site is as below.

<html>
 <head>
 </head>
 <body>
  <table>
   <tr>
    <td>
    <td>
     <table>
      <tr>
       <td>
        <!-- start of toolbar Main -->
        <table>
         <tr>
          <td>
           <table>
            <tr class="buttonPad">
            </tr>
            <tr>
   *          <td nowrap="true" valign="top" class="button"><a id="S7674" accesskey="S" class="button" title="SEARCH" onclick="dispatch('S7674');"><u>S</u>></td>
            </tr>
           </table>
          </td>
          <td</td>
         </tr>
        </table>
      </td>
      </tr>
     </table>
    </td>
    </td>
   </tr>
  </table>
 </body>
</html>

and my code

   my $tree= HTML::TreeBuilder::XPath->new;
      $tree->parse($url);

   my @nodes = $tree->findnodes('/html/body/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr/td/table.buttonSpace/tbosy/tr/a.button')->get_nodelist; # line is modified later.
   my $nodecount = scalar(@nodes);

   if ($nodecount > 0 ) { print "we found something\n"; }
   else { print "nothing found\n"; } 

   foreach my $node (@nodes)
   {
      print "node is $node\n";
      my $id = $node->findvalue('button');
      print "my id is $id\n";
   }

Sadly my code doesn't return any node values.

Many thanks in advance.

Micro


Solution

  • This seems to work:

    use strict;
    use warnings;
    use HTML::TreeBuilder;
    use Data::Dumper;
    
    my $html = <<HTML;
    <html>
     <head>
     </head>
     <body>
      <table>
       <tr>
        <td>
        <td>
         <table>
          <tr>
           <td>
            <!-- start of toolbar Main -->
            <table>
             <tr>
              <td>
               <table>
                <tr class="buttonPad">
                </tr>
                <tr>
                <td nowrap="true" valign="top" class="button"><a id="S7674" accesskey="S" class="button" title="SEARCH" onclick="dispatch('S7674');"><u>S</u>></td>
                </tr>
               </table>
              </td>
              <td</td>
             </tr>
            </table>
          </td>
          </tr>
         </table>
        </td>
        </td>
       </tr>
      </table>
     </body>
    </html>
    HTML
    
    my $tree = HTML::TreeBuilder->new_from_content( $html );
    foreach my $atag ( $tree->look_down( _tag => q{a}, 'class' => 'button', 'title' => 'SEARCH' ) ) {
        print Dumper $atag->attr('id');
    }