phpjquerylaravelweb-scrapinggoutte

Extracting specific xml from a Goutte request from the node returning back


I am using the Laravel Goutte package to perform some webscraping - the following code works and returns a lot of data, I am trying to filter out only the bit of data I require.

If I load up the browser (whilst injecting jQuery into the page) I am able to get the data I need using jQuery using the following in the console jQuery('ea-proclub-overview')[0]; - I am basically trying to do the equivalent of this command within the Laravel/Goutte instance below.

Using jQuery('ea-proclub-overview')[0].customCrestBaseUrl; in the console I get the exact URL I need - https://fifa21.content.easports.com/fifa/fltOnlineAssets/05772199-716f-417d-9fe0-988fa9899c4d/2021/fifaweb/crests/256x256/l'

Below is my PHP code - I am getting back in the $node variable but I am unsure how to only return the customCrestBaseUrl so it gives me the URL.

$client = new Client();
$client->setServerParameter('HTTP_USER_AGENT', 'Mozilla/5.0 (X11; Linux i686; rv:78.0) Gecko/20100101 Firefox/78.0');
$client->setServerParameter('REFERER', 'https://www.ea.com/');

$url = 'https://www.ea.com/en-gb/games/fifa/pro-clubs/ps5-xbsxs/overview?clubId=2552&platform=ps5';
$crawler = $client->request('GET', $url);
$crawler->filter('ea-proclub-overview')->each(function ($node) {
  dd($node);
});  

Expected Result

<ea-proclub-overview endpoints="{&quot;settingsEndpoint&quot;:&quot;https://proclubs.ea.com/api/fifa/settings&quot;,&quot;seasonalStatsEndpoint&quot;:&quot;https://proclubs.ea.com/api/fifa/clubs/seasonalStats&quot;,&quot;clubsInfoEndpoint&quot;:&quot;https://proclubs.ea.com/api/fifa/clubs/info&quot;,&quot;matchesEndpoint&quot;:&quot;https://proclubs.ea.com/api/fifa/clubs/matches&quot;,&quot;memberStatEndpoint&quot;:&quot;https://proclubs.ea.com/api/fifa/members/stats&quot;,&quot;memberCareerStatEndpoint&quot;:&quot;https://proclubs.ea.com/api/fifa/members/career/stats&quot;}" colors="{&quot;currentDivision&quot;:{&quot;startColor&quot;:&quot;#FA4358&quot;,&quot;endColor&quot;:&quot;#FA4358&quot;},&quot;nextDivision&quot;:{&quot;relegationColor&quot;:&quot;#FA4358&quot;,&quot;pointsColor&quot;:&quot;#FA4358&quot;},&quot;pieChart&quot;:{&quot;winsColor&quot;:&quot;#19A863&quot;,&quot;lossesColor&quot;:&quot;#C4010D&quot;,&quot;tiesColor&quot;:&quot;#282D3B&quot;},&quot;stats&quot;:{&quot;wins&quot;:{&quot;startColor&quot;:&quot;#19A863&quot;,&quot;endColor&quot;:&quot;#94D85D&quot;},&quot;losses&quot;:{&quot;startColor&quot;:&quot;#C4010D&quot;,&quot;endColor&quot;:&quot;#F80245&quot;},&quot;ties&quot;:{&quot;startColor&quot;:&quot;#282D3B&quot;,&quot;endColor&quot;:&quot;#282D3B&quot;}}}" match-type="[&quot;gameType9&quot;,&quot;gameType13&quot;]" headers-labels="{&quot;points&quot;:&quot;Points&quot;,&quot;stats&quot;:{&quot;wins&quot;:{&quot;label&quot;:&quot;Wins&quot;,&quot;description&quot;:&quot;Wins&quot;},&quot;losses&quot;:{&quot;label&quot;:&quot;Losses&quot;,&quot;description&quot;:&quot;Losses&quot;},&quot;ties&quot;:{&quot;label&quot;:&quot;Draws&quot;,&quot;description&quot;:&quot;Draws&quot;}}}" division-labels="{&quot;title&quot;:&quot;Division Ranking&quot;,&quot;currentDivisionTitle&quot;:&quot;Current Division&quot;,&quot;nextDivisionTitle&quot;:&quot;Points To Next Division&quot;,&quot;seasons&quot;:&quot;Season&quot;,&quot;record&quot;:&quot;Record&quot;,&quot;points&quot;:&quot;Points&quot;,&quot;gamesPlayed&quot;:&quot;Games Played&quot;,&quot;gamesRemaining&quot;:&quot;Games Remaining&quot;,&quot;divisionImgBaseUrl&quot;:&quot;https://media.contentapi.ea.com/content/dam/eacom/fifa/pro-clubs/divisioncrest&quot;,&quot;stats&quot;:{&quot;wins&quot;:&quot;W&quot;,&quot;losses&quot;:&quot;L&quot;,&quot;ties&quot;:&quot;D&quot;}}" progressbar-labels="{&quot;div&quot;:&quot;Div&quot;,&quot;promotion&quot;:&quot;Promotion&quot;,&quot;relegation&quot;:&quot;Relegation&quot;,&quot;title&quot;:&quot;Title&quot;}" members-labels="{&quot;title&quot;:&quot;Members&quot;,&quot;linkText&quot;:&quot;View All Members&quot;,&quot;linkUrl&quot;:&quot;members&quot;,&quot;totalTitle&quot;:&quot;Total Members&quot;,&quot;totalCountsLabel&quot;:&quot;Total&quot;,&quot;memberDetails&quot;:{&quot;proOverall&quot;:&quot;Overall Rating&quot;,&quot;ratingAve&quot;:&quot;Average Match Rating&quot;,&quot;gamesPlayed&quot;:&quot;Games Played&quot;},&quot;memberPosition&quot;:{&quot;defender&quot;:&quot;Defender&quot;,&quot;forward&quot;:&quot;Forward&quot;,&quot;goalkeeper&quot;:&quot;Goalkeeper&quot;,&quot;midfielder&quot;:&quot;Midfielder&quot;},&quot;positions&quot;:{&quot;defender&quot;:&quot;Defenders&quot;,&quot;forward&quot;:&quot;Forwards&quot;,&quot;goalkeeper&quot;:&quot;Goalkeepers&quot;,&quot;midfielder&quot;:&quot;Midfielders&quot;},&quot;defaultMemberAvatar&quot;:&quot;https://media.contentapi.ea.com/content/dam/ea/fifa/fifa-21/pro-clubs/common/pro-clubs/avatar.png&quot;}" match-labels="{&quot;title&quot;:&quot;Last Match&quot;,&quot;linkText&quot;:&quot;View All Match History&quot;,&quot;linkUrl&quot;:&quot;match-history&quot;,&quot;altTitle&quot;:&quot;No match data was found&quot;}" trophies-labels="{&quot;title&quot;:&quot;Trophies&quot;,&quot;cupsLabel&quot;:{&quot;leaguesWon&quot;:&quot;Leagues Won&quot;,&quot;titlesWon&quot;:&quot;Titles Won&quot;,&quot;totalCupsWon&quot;:&quot;Total Cups Won&quot;},&quot;cupsImg&quot;:{&quot;leaguesWonImgUrl&quot;:&quot;https://media.contentapi.ea.com/content/dam/ea/fifa/fifa-21/pro-clubs/common/pro-clubs/league-titles-21.png&quot;,&quot;titlesWonImgUrl&quot;:&quot;https://media.contentapi.ea.com/content/dam/ea/fifa/fifa-21/pro-clubs/common/pro-clubs/all-tiles-21.png&quot;,&quot;totalCupsWonImgUrl&quot;:&quot;https://media.contentapi.ea.com/content/dam/ea/fifa/fifa-21/pro-clubs/common/pro-clubs/cups-won-21.png&quot;}}" history-labels="{&quot;title&quot;:&quot;Club History&quot;,&quot;subTitle&quot;:&quot;Overall Record&quot;,&quot;pts&quot;:&quot;Points&quot;,&quot;division&quot;:&quot;Division&quot;,&quot;historyDetails&quot;:{&quot;seasons&quot;:&quot;Seasons Played&quot;,&quot;totalGames&quot;:&quot;Total Games&quot;,&quot;titlesWon&quot;:&quot;Titles Won&quot;,&quot;bestPoints&quot;:&quot;Highest Points Total&quot;,&quot;promotions&quot;:&quot;Promotions&quot;,&quot;relegations&quot;:&quot;Relegations&quot;},&quot;stats&quot;:{&quot;wins&quot;:&quot;Wins&quot;,&quot;losses&quot;:&quot;Losses&quot;,&quot;ties&quot;:&quot;Draws&quot;},&quot;statsShort&quot;:{&quot;wins&quot;:&quot;W&quot;,&quot;losses&quot;:&quot;L&quot;,&quot;ties&quot;:&quot;D&quot;},&quot;progressBar&quot;:{&quot;title&quot;:&quot;Best Season Finish&quot;,&quot;tipLabel&quot;:&quot;DIV&quot;,&quot;startColor&quot;:&quot;#9B7801&quot;,&quot;endColor&quot;:&quot;#F9F1A5&quot;,&quot;divisionBaseUrl&quot;:&quot;https://media.contentapi.ea.com/content/dam/eacom/fifa/pro-clubs/divisioncrest&quot;}}" translations="{&quot;4543827&quot;:&quot;East Coast US&quot;,&quot;5723475&quot;:&quot;West Coast US&quot;,&quot;5719381&quot;:&quot;Western Europe&quot;,&quot;4539733&quot;:&quot;Eastern Europe&quot;,&quot;5129557&quot;:&quot;Northern Europe&quot;,&quot;5457237&quot;:&quot;Southern Europe&quot;,&quot;4344147&quot;:&quot;British Isles&quot;,&quot;5456205&quot;:&quot;South America&quot;,&quot;4407629&quot;:&quot;Central America&quot;,&quot;4281153&quot;:&quot;Asia&quot;,&quot;4281683&quot;:&quot;Australia / New Zealand&quot;}" crest-base-url="https://fifa21.content.easports.com/fifa/fltOnlineAssets/05772199-716f-417d-9fe0-988fa9899c4d/2021/fifaweb/crests/256x256/l" custom-crest-base-url="https://fifa21.content.easports.com/fifa/fltOnlineAssets/05772199-716f-417d-9fe0-988fa9899c4d/2021/fifaweb/crests/256x256/l" default-crest-url="https://media.contentapi.ea.com/content/dam/ea/fifa/fifa-21/pro-clubs/common/pro-clubs/crest-default.png" loading-image="https://media.contentapi.ea.com/content/dam/eacom/fifa/pro-clubs/loading-animation.png" default-club-name="Disbanded"></ea-proclub-overview>

Actual Result

-- Too much post but it all of the HTML & XML

Below is pastebin of the entire response from the $crawler when dumped out using dd(). https://pastebin.com/qxUTpu9p


Solution

  • According to the documentation:

    $customCrestBaseUrl = $crawler
        ->filter('ea-proclub-overview')
        ->first()
        ->extract(['custom-crest-base-url'])
    ;