phpdomdomdocumentdomxpath

How do I parse an attribute of a node


I'm trying to extract the href attribute from a link inside a span, but getting the error: Uncaught Error: Call to a member function getElementsByTagName()

    $sample_html = <<<HTML
<div class="link-to-flavors">
    <span class="flavor-banner"></span>
    <span class="flavor-space"></span>
    <span class="flavor-title"><span><a href="flavors.com/vanilla" class="flavor-link">Vanilla</a></span></span>
    <span class="flavor-price">$1.99</span>
    <span class="flavor-icons icons-inrow"></span>
</div>
<div class="link-to-flavors">
    <span class="flavor-banner"></span>
    <span class="flavor-space"></span>
    <span class="flavor-title"><span><a href="flavors.com/strawberry" class="flavor-link">Strawberry</a></span></span>
    <span class="flavor-price">$2.99</span>
    <span class="flavor-icons icons-inrow"></span>
</div>
HTML;

    // Create a DOMDocument
    @$dom = new DOMDocument( );

    // Handle HTML5/svg errors
    libxml_use_internal_errors( true );

    // Load HTML 
    $dom->loadHTML( $sample_html );

    $xpath = new DOMXPath( $dom );
    $xpath_query = '//div[@class="link-to-flavors"]';
    $obj_div_flavors = $xpath->query( $xpath_query );

    if( !is_null($obj_div_flavors) ) {
        $result_array = array( );

        foreach( $obj_div_flavors as $obj_div_flavor_each ) {
            $obj_div_flavor_spans = $obj_div_flavor_each->getElementsByTagName( 'span' );

            $flavor_title = null;
            $flavor_link = null;
            $flavor_price = null;

            foreach( $obj_div_flavor_spans as $obj_div_flavor_span_each ) {
                if( stripos($obj_div_flavor_span_each->getAttribute('class'), 'flavor-title') !== false ) {
                    $flavor_title = $obj_div_flavor_span_each->textContent;
                }

                if( stripos($obj_div_flavor_span_each->getAttribute('class')->getElementsByTagName('a')->getAttribute('href'), 'flavor-title') !== false ) {
                    $flavor_link = $obj_div_flavor_span_each->textContent;
                }

                if( stripos($obj_div_flavor_span_each->getAttribute('class'), 'flavor-price') !== false ) {
                    $flavor_price = $obj_div_flavor_span_each->textContent;
                }
            }

            $result_array[] = array( 'flavor_title' => $flavor_title, 'flavor_link' => $flavor_link, 'flavor_price' => $flavor_price );
        }

        print_r( $result_array );
    }

Solution

  • $sample_html = <<<HTML
    <div class="link-to-flavors">
        <span class="flavor-banner"></span>
        <span class="flavor-space"></span>
        <span class="flavor-title"><span><a href="flavors.com/vanilla" class="flavor-link">Vanilla</a></span></span>
        <span class="flavor-price">$1.99</span>
        <span class="flavor-icons icons-inrow"></span>
    </div>
    <div class="link-to-flavors">
        <span class="flavor-banner"></span>
        <span class="flavor-space"></span>
        <span class="flavor-title"><span><a href="flavors.com/strawberry" class="flavor-link">Strawberry</a></span></span>
        <span class="flavor-price">$2.99</span>
        <span class="flavor-icons icons-inrow"></span>
    </div>
    HTML;
    
        // Create a DOMDocument
        @$dom = new DOMDocument( );
    
        // Handle HTML5/svg errors
        libxml_use_internal_errors( true );
    
        // Load HTML 
        $dom->loadHTML( $sample_html );
    
        $xpath = new DOMXPath( $dom );
        $xpath_query = '//div[@class="link-to-flavors"]';
        $obj_div_flavors = $xpath->query( $xpath_query );
    
        if( !is_null($obj_div_flavors) ) {
            $result_array = array( );
    
            foreach( $obj_div_flavors as $obj_div_flavor_each ) {
                $obj_div_flavor_spans = $obj_div_flavor_each->getElementsByTagName( 'span' );
    
                $flavor_title = null;
                $flavor_link = null;
                $flavor_price = null;
    
                foreach ($obj_div_flavor_spans as $obj_div_flavor_span_each) {
                    if (stripos($obj_div_flavor_span_each->getAttribute('class'), 'flavor-title') !== false) {
                        // Get the <a> element within the 'flavor-title' span
                        $flavor_link_element = $obj_div_flavor_span_each->getElementsByTagName('a')->item(0);
                
                        if ($flavor_link_element) {
                            $flavor_link = $flavor_link_element->getAttribute('href');
                        }
                
                        $flavor_title = $obj_div_flavor_span_each->textContent;
                    }
                
                    if (stripos($obj_div_flavor_span_each->getAttribute('class'), 'flavor-price') !== false) {
                        $flavor_price = $obj_div_flavor_span_each->textContent;
                    }
                }
    
                $result_array[] = array( 'flavor_title' => $flavor_title, 'flavor_link' => $flavor_link, 'flavor_price' => $flavor_price );
            }
    
            print_r( $result_array );
        }