pythonweb-scrapingbeautifulsoupurllib3

Scrape table html multipage with beautifulsoup4 and urllib3


Help me please,, the code I made only works for 1 page, I want it for all pages. what should I do?

import csv 
import urllib3
from bs4 import BeautifulSoup


outfile = open("data.csv","w",newline='')
    writer = csv.writer(outfile)


    for i in range(1,20) :
            url = f'http://ciumi.com/cspos/barcode-ritel.php?page={i}'
            req = urllib3.PoolManager()
            res = req.request('GET', url)
            tree = BeautifulSoup(res.data, 'html.parser')  
            table_tag = tree.select("table")[0]
    tab_data = [[item.text for item in row_data.select("th,td")]
                    for row_data in table_tag.select("tr")]

    for data in tab_data:
        writer.writerow(data)
        print( res, url, ' '.join(data))

Solution

  • Your code is working well, if you want to scrape all the uri and get data from them you just have to correctly indent it:

    import csv
    import urllib3
    from bs4 import BeautifulSoup
    
    
    outfile = open("data.csv","w",newline='')
    writer = csv.writer(outfile)
    
    for i in range(1,20) :
        url = f'http://ciumi.com/cspos/barcode-ritel.php?page={i}'
        req = urllib3.PoolManager()
        res = req.request('GET', url)
        tree = BeautifulSoup(res.data, 'html.parser')
        table_tag = tree.select("table")[0]
        tab_data = [[item.text for item in row_data.select("th,td")] for row_data in table_tag.select("tr")]
        for data in tab_data:
            writer.writerow(data)
            print( res, url, ' '.join(data))
    

    But you have to clean the data to have a pretty csv file