pythonselenium-webdriverweb-scraping

Web scraping dynamic tables with pagination


I need help to web scrap the dynamic table in this website: rootdata

This table has multiple pages. I need to pull the first 200 rows of the table with 'No token' as filter. image example

I will appreciate some guidance.

I am using Selenium but I am still not proficient on it.


Solution

  • from selenium import webdriver
    from selenium.webdriver.firefox.service import Service
    from selenium.webdriver.common.by import By
    import time
    
    gecko_path = 'geckodriver.exe'
    service = Service(gecko_path)
    
    driver = webdriver.Firefox(service=service)
    driver.get('https://www.rootdata.com/Fundraising')
    
    time.sleep(5)
    
    title = driver.title
    print(f"The title of the page is: {title}")
    
    #close dialog
    for dialog in driver.find_elements(By.CLASS_NAME, 'v-dialog__content'):
        for button in dialog.find_elements(By.TAG_NAME, 'button'):
            button.click()
    time.sleep(1)
    
    #wait token group
    token = None
    while not token:
        for header in driver.find_elements(By.CLASS_NAME, 'el-collapse-item__header'):
            print(header.text)
            if header.text.startswith('Token'):
                token = header
        if token:
            break
        print('No token found, retrying...')
        time.sleep(1)
    
    #expand token group
    cls = token.get_attribute('class')
    print(cls)
    if cls.find('is-active') < 0:
        token.click()
    time.sleep(1)
    
    #click last radio button (no token)
    radios = driver.find_elements(By.CLASS_NAME, 'el-radio')
    radios[-1].click()
    
    #print table rows
    tbody = driver.find_element(By.TAG_NAME, 'tbody')
    for tr in tbody.find_elements(By.TAG_NAME, 'tr'):
        print(tr.text)
        print('\n')