How to scrape a webpage url which is same as main page url in python?

The below website url opens a form , in which we only need to select fiscal year and click on search to get the data for that year, but the search year also opens the same url as below -

https://cfpub.epa.gov/compliance/criminal_prosecution/index.cfm

I have written below piece of code by manually putting the year xpath of 2023-

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
import time
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
b = webdriver.Chrome()
b.get(url)
time.sleep(10)
total_article_xpath = "//*[@id=\"main-content\"]/div[2]/div[1]/div/div/form/table/tbody/tr[8]/td/div/div[2]/select/option[42]"
element = WebDriverWait(b, 10).until(EC.presence_of_element_located((By.XPATH, total_article_xpath)))
time.sleep(10)
print(element)
getdetails = element.find_element(By.XPATH, total_article_xpath)
button_val = "//*[@id=\"searchButton\"]"
b.find_element(By.XPATH, button_val).click()
print(b)
vals = b.current_url

How can we navigate and scrape the fiscal year url which is same as main page url? Any help would be appreciated.

Solution

You need to make a selection from the Fiscal Year dropdown then click the Search button. You can then proceed to scrape the resulting page as normal.

Here's an example:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver import ChromeOptions

URL = "https://cfpub.epa.gov/compliance/criminal_prosecution/index.cfm"
SELECT_XPATH = "//*[@id='main-content']/div[2]/div[1]/div/div/form/table/tbody/tr[8]/td/div/div[2]/select"
SEARCH_ID = "searchButton"
TBODY_XPATH = "//*[@id='main-content']/div[2]/div[1]/div/div/table/tbody/tr[1]/td[2]/table/tbody"
FISCAL_YEAR = "2000"

options = ChromeOptions()
options.add_argument("--headless")

with webdriver.Chrome(options) as driver:
    driver.get(URL)
    wait = WebDriverWait(driver, 5)
    s = Select(wait.until(EC.presence_of_element_located((By.XPATH, SELECT_XPATH))))
    if FISCAL_YEAR in {option.text for option in s.options}:
        s.select_by_visible_text(FISCAL_YEAR)
        wait.until(EC.element_to_be_clickable((By.ID, SEARCH_ID))).click()
        tbody = wait.until(EC.presence_of_element_located((By.XPATH, TBODY_XPATH)))
        for td in tbody.find_elements(By.CSS_SELECTOR, "td.valign-top")[::2]:
            print(td.text)
    else:
        print(f"{FISCAL_YEAR} is not an available option")

Output:

Allen Sinclair
BP Exploration-Alaska (BPXA)
Ben Shafsky
Doyon Drilling, Inc.
Michael Krupa