Hi everyone. So I need to check huge amount of rows in database via API calls. I managed to do that but it is very slow.
Here are steps my code is doing:
1.) I select unique identifiers from my local database.
2.) I check them on remote database via curl_multi (100 at the time).
3.) Depending on status code I update rows in my database.
4.) Repeat first 3 steps until all rows are checked.
How can I speedup this process ? At this time checking 600 rows takes more than one minute.
foreach ($data as $d){
$kupci = [];
$upit = "SELECT * FROM database_table WHERE provjeren IS NULL LIMIT 100";
$result = $conn->query($upit)
while ($r = mysqli_fetch_assoc($result)) {
$kupci[] = $r;
}
$node_count = count($kupci);
$curl_arr = array();
for ($i = 0; $i < $node_count; $i++) {
$ID = str_replace(" ","%20",$kupci[$i]['Customer_ID']) ;
$url = "someapiurl/$ID";
$curl_arr[$i] = curl_init($url);
curl_setopt($curl_arr[$i], CURLOPT_HTTPHEADER, array("Authorization: Bearer"));
curl_setopt($curl_arr[$i], CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl_arr[$i], CURLOPT_ENCODING, '');
}
$master = curl_multi_init();
for ($i = 0; $i < $node_count; $i++) {
curl_multi_add_handle($master, $curl_arr[$i]);
}
do {
curl_multi_exec($master, $running);
} while ($running > 0);
for ($i = 0; $i < $node_count; $i++) {
$results = curl_multi_getcontent($curl_arr[$i]);
$link = curl_getinfo($curl_arr[$i], CURLINFO_EFFECTIVE_URL);
$id = substr($link, strrpos($link, '/') + 1);
$statusCode = curl_getinfo($curl_arr[$i], CURLINFO_RESPONSE_CODE);
if ($statusCode == 204) {
$conn->query("UPDATE database_table SET provjeren = 1, prigovor = 1, datum = now() WHERE Customer_ID = '$id'");
} elseif ($statusCode == 404) {
$conn->query("UPDATE database_table SET provjeren = 1, prigovor = 0, datum = now() WHERE Customer_ID = '$id'");
}elseif ($statusCode == 429){
echo "Previše";
}elseif ($statusCode == 200){
$conn->query("UPDATE database_table SET provjeren = 1, prigovor = 2, datum = now() WHERE Customer_ID = '$id'");
}
curl_multi_remove_handle($master, $curl_arr[$i]);
curl_close($curl_arr[$i]);
}
curl_multi_close($master);
}
EDIT: I tried Rolling-Curl and I get same results. So my conclusion is that target server is slow and that is my bottleneck. Tested 500 requests on my target server it took 36s to complete, and on Google server it took only 2s to complete.
Have you tried to run the HTTP API calls by a new thread.
Try this PHP project: https://github.com/petewarden/ParallelCurl
According to the docs you need to:
If it doesn't help, try to measure what takes the most time (DB fetching, API calls, DB update)