For many years I extracted sound from google.translate.com using Chrome DevTools and the Network tab. When clicking the sound button in the Network tab, an mp3 file appears. I just clicked that and downloaded the file for educational purposes.
Now, there only XHR type files with countless character sequences inside square brackets like strings in JS arrays. And page itself is quite complicated.
How can I extract sound under the new circumstances?
php code doing this:
<?php
$lang = 'en';
$text = 'hello world';
$curl = curl_init();
curl_setopt_array($curl, [
CURLOPT_URL => 'https://translate.google.com/_/TranslateWebserverUi/data/batchexecute',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS => http_build_query([
'f.req' => json_encode([
[
[
'jQ1olc',
json_encode([
$text,
$lang,
null,
json_encode(null),
]),
null,
'generic',
]
]
]),
]),
]);
$response = curl_exec($curl);
curl_close($curl);
if ($response && preg_match('#//NE[^\\\\]+#', $response, $matches)) {
file_put_contents('test.mp3', base64_decode($matches[0]));
}
else {
echo "error\n";
}