phpsymfony4csv-import

How can I improve CSV import faster in Symfony?


I have to import CSV file of around 20,000 rows. This CSV is read from FTP Server which will update every 30 minutes. But the code I have written is already taking more than 45 mintues to import. This is very slow. Can anybody please help me.

foreach ($readers as $key => $row) {
    $totalRecords +=1;

    $filterArray = $this->entityManager->getRepository(Article::class)->findBy(['id' =>  $row['id']]);

    if (empty($filterArray)) {
        $notFoundRecords +=1;
        continue;
    }
    $foundRecords +=1;
    $this->processPriceRow($row);
}


protected function processPriceRow($row)
{
    $existingRecord = $this->entityManager
                           ->getRepository(WareHouse::class)
                           ->findBy(['id' => $row['product_id']]);

    if (empty($existingRecord)) {
        return $this->fillArticleWareHouse($row);
    }
}


protected function fillArticleWareHouse($row, $i, $batchSize)
{
    $newWareHouse = new WareHouse();
    ....
    ....
    ...

    // Insert.
    $this->entityManager->persist($newWareHouse);
    $this->entityManager->flush();
}

I am thinking of persist the data every based on the batchSize = 100. But as I have function inside function, I am not being able to implement that as well.


Solution

  • You can implement batch processing like this.

    
        protected $batchSize = 100;
        protected $i = 0;
        
        protected function processPriceRow($row)
        {
            $existingRecord = $this->entityManager
                ->getRepository(WareHouse::class)
                ->findBy(['id' => $row['product_id']]);
    
            if (empty($existingRecord)) {
                return $this->fillArticleWareHouse($row);
            }
            $this->entityManager->flush();
        }
    
        protected function fillArticleWareHouse($row)
        {
            $newWareHouse = new WareHouse();
            //....
            $this->entityManager->persist($newWareHouse);
            ++$this->i;
            if (($this->i % $this->batchSize) === 0) {
                $this->entityManager->flush();
            }
        }
    

    Also it would be better if you select all Article and WareHouse entities with one select and save them to array [entityId => Entity].

            // articles
            $rowIds = [];
            foreach ($readers as $key => $row) {
                $rowIds[] = $row['id'];
            }
            
            $articles = $this->entityManager->getRepository(Article::class)->findBy(['id' =>  $rowIds]);
            $articleIdToArticle = [];
            foreach ($articles as $article) {
                $articleIdToArticle[$article->getId()] = $article;
            }
            
            foreach ($readers as $key => $row) {
                $totalRecords +=1;
                if(!key_exists($row['id'], $articleIdToArticle)) {
                    $notFoundRecords +=1;
                    continue;
                }            
                $foundRecords +=1;
                $this->processPriceRow($row);
            }