pythonseleniumdatenoaa

Using Python and Selenium to select a date range from a pop-up menu


I'm trying to automate a data pull from the NOAA website (I was using requests, but there seems to be a bug, so I'm trying to use Selenium to automate the pulling of reports).

from selenium import webdriver
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get('https://www.ncdc.noaa.gov/cdo-web/search')

# Select type of data
t = Select(driver.find_element(By.CSS_SELECTOR, '#selectedDataset'))
t.select_by_visible_text('Daily Summaries')

The basic drop-down (data type, code above) and input field (search term) are fairly straightforward, no problems. Where I'm struggling is with the date range picker. I can get it to open into the calendar with this:

driver.find_element(By.CSS_SELECTOR, '#dateRangeContainer').click()

I can't for the life of me get it to open the year or month drop-downs. I've tried Select, ActionChains, and a number of other things, such as:

driver.find_element(By.CSS_SELECTOR, '#dateRangeContainer > div > div > div.noaa-datepicker-start-container.center.clearfix')

Select(driver.find_element(By.CSS_SELECTOR, '#dp1662812794185 > div > div'))

WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#select.ui-datepicker-year")))

start_field = driver.find_element(By.CSS_SELECTOR, '#dp1662493859959 > div > div > div > select.ui-datepicker-year')
ActionChains(driver).move_to_element(start_field).click().send_keys('2020').perform()

I've also Googled extensively and haven't found anything that seems to work. Generally what happens is that it gives me a NoSuchElementException. I've actually gone line by line through the inspect pane to copy each of the elements in desperation to see if any would work (none did). Here are some other things I've tried:

Is the problem that Selenium is still looking for the selector on the background page and hasn't switched to the pop-up? It doesn't appear to be an iFrame (I think), but if that's the case, how do I direct Selenium to look at the pop-up instead?

REVISED CODE (Thanks to Barry's solution below):

from selenium import webdriver
from selenium.common.exceptions import NoSuchShadowRootException, NoSuchElementException
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select, WebDriverWait

chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument('window-size=1280,720')
webdriver_service = Service('C:/Program Files (x86)/Google/chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(driver)
wait = WebDriverWait(driver, 10)
driver.get('https://www.ncdc.noaa.gov/cdo-web/search')

start_date = '2020-01-01'
end_date = '2020-12-31'
station = 'USW00014739'

# Select type of data
t = Select(driver.find_element(By.CSS_SELECTOR, '#selectedDataset'))
t.select_by_visible_text('Daily Summaries')

# Select date range
dataset = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "select[id='selectedDataset']"))))
dataset.select_by_index(3)
daterange = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[class="noaa-daterange-input"]')))
daterange.click() 

months = {'01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun', '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'}

def select_date(calendar, date):
    container = '.noaa-datepicker-start-container' if calendar == 'start' else '.noaa-datepicker-end-container'
    day_select = date[8:] 
    day_select = day_select[1:2] if day_select[0] == '0' else day_select
    
    parent = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, container)))
    year = Select(parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectYear"]'))
    year.select_by_visible_text(date[:4] ) 
    month = Select(parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectMonth"]'))
    month.select_by_visible_text(months.get(date[5:7])) 
    day = parent.find_element(By.XPATH, f'//a[text() = "{day_select}" ]')
    day.click()

if int(end_date[8:]) > int(start_date[8:]):
    select_date('end', end_date)
    select_date('start', start_date)
else:
    select_date('start', start_date)
    select_date('end', end_date)

driver.find_element(By.CSS_SELECTOR, '#noaa-daterange-form > button.noaa-daterange-btn.noaa-daterange-applybtn').click()
driver.find_element(By.CSS_SELECTOR, '#selectedSearchString').send_keys(station)
driver.find_element(By.CSS_SELECTOR, '#searchSubmit').click()

driver.close()

Solution

  • This is one way of selecting the date on that page (I'm selecting just the start date, you can mirror the code for the end date as well):

    from selenium import webdriver
    from selenium.webdriver.chrome.service import Service
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.support.ui import Select
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.action_chains import ActionChains
    from selenium.webdriver.common.keys import Keys
    from selenium.common.exceptions import NoSuchShadowRootException, NoSuchElementException
    
    chrome_options = Options()
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument('disable-notifications')
    
    chrome_options.add_argument("window-size=1280,720")
    
    webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
    browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
    actions = ActionChains(browser)
    wait = WebDriverWait(browser, 20)
    
    url = 'https://www.ncdc.noaa.gov/cdo-web/search'
    
    browser.get(url) 
    
    dataset = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "select[id='selectedDataset']"))))
    dataset.select_by_index(3)
    daterange = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[class="noaa-daterange-input"]')))
    daterange.click() 
    start_date_parent = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".noaa-datepicker-start-container")))
    start_year = Select(start_date_parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectYear"]'))
    start_year.select_by_visible_text('2009') 
    print('selected 2009')
    start_month = Select(start_date_parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectMonth"]'))
    start_month.select_by_visible_text('Jul') 
    print('selected July')
    start_day = start_date_parent.find_element(By.XPATH, '//a[text() = "13" ]')
    start_day.click()
    print('selected the 13th')
    

    This will select 2009-07-13 as starting date, and also print in terminal:

    selected 2009
    selected July
    selected the 13th
    

    You should now be able to write the code for end date as well, select/input the info in 'Search for'/'Search term' and click Apply.

    Selenium documentation: https://www.selenium.dev/documentation/