Trying to get the "field" links nested in the page tree on this page. I can get as far as retrieving the first level links, but not any of the third level links that require clicking and expanding the tree, I think because that's dynamically generated? I've tried the XPATH for the inner elements in the page tree as well, but got back nothing. Here's the code I'm using so far. I think I need to include some additional code to click and expand the tree? Do I need to use other libraries? Very thankful for any help
Code
from selenium import webdriver
from selenium.webdriver.common.by import By
options = webdriver.ChromeOptions()
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)
url = 'https://ddwiki.reso.org/display/DDW17/'
driver.get(url)
elem = driver.find_element(By.CLASS_NAME, "plugin_pagetree_children_list")
links=elem.find_elements(By.TAG_NAME, "a")
for lnk in links:
print(lnk.get_attribute('href'))
driver.quit()
Output (shortened)
https://ddwiki.reso.org/display/DDW17/Data+Dictionary+v1.7+Introduction?src=contextnavpagetreemode
https://ddwiki.reso.org/display/DDW17/Property+Resource?src=contextnavpagetreemode
https://ddwiki.reso.org/display/DDW17/Member+Resource?src=contextnavpagetreemode
https://ddwiki.reso.org/display/DDW17/Office+Resource?src=contextnavpagetreemode
https://ddwiki.reso.org/display/DDW17/Contacts+Resource?src=contextnavpagetreemode
HTML
<div class="plugin_pagetree_children" id="children393239-0">
<ul class="plugin_pagetree_children_list" id="child_ul393239-0">
<li>
<div class="plugin_pagetree_childtoggle_container">
<a id="plusminus393267-0" class="plugin_pagetree_childtoggle aui-icon aui-icon-small aui-iconfont-chevron-right" href="" data-type="toggle" data-page-id="393267" data-tree-id="0" aria-expanded="false" aria-label="Expand item Data Dictionary v1.7 Introduction">
</a>
</div>
<div class="plugin_pagetree_children_content">
<span class="plugin_pagetree_children_span" id="childrenspan393267-0"> <a href="/display/DDW17/Data+Dictionary+v1.7+Introduction?src=contextnavpagetreemode">Data Dictionary v1.7 Introduction</a>
</span>
</div>
<div id="children393267-0" class="plugin_pagetree_children_container">
</div>
</li>
<li>
<div class="plugin_pagetree_childtoggle_container">
<a id="plusminus393372-0" class="plugin_pagetree_childtoggle aui-icon aui-icon-small aui-iconfont-chevron-down" href="" data-type="toggle" data-page-id="393372" data-tree-id="0" aria-expanded="true" aria-label="Expand item Property Resource" data-children-loaded="true" data-expanded="true">
</a>
</div>
<div class="plugin_pagetree_children_content">
<span class="plugin_pagetree_children_span" id="childrenspan393372-0"> <a href="/display/DDW17/Property+Resource?src=contextnavpagetreemode">Property Resource</a>
</span>
</div>
<div id="children393372-0" class="plugin_pagetree_children_container">
<ul class="plugin_pagetree_children_list" id="child_ul393372-0">
<li>
<div class="plugin_pagetree_childtoggle_container">
<a id="plusminus393402-0" class="plugin_pagetree_childtoggle aui-icon aui-icon-small aui-iconfont-chevron-down" href="" data-type="toggle" data-page-id="393402" data-tree-id="0" aria-expanded="true" aria-label="Expand item Business Group" data-children-loaded="true" data-expanded="true">
</a>
</div>
<div class="plugin_pagetree_children_content">
<span class="plugin_pagetree_children_span" id="childrenspan393402-0"> <a href="/display/DDW17/Business+Group?src=contextnavpagetreemode">Business Group</a>
</span>
</div>
<div id="children393402-0" class="plugin_pagetree_children_container" style="display: block;">
<ul class="plugin_pagetree_children_list" id="child_ul393402-0">
<li>
<div class="plugin_pagetree_childtoggle_container">
<span class="no-children icon"></span>
</div>
<div class="plugin_pagetree_children_content">
<span class="plugin_pagetree_children_span" id="childrenspan393855-0"> <a href="/display/DDW17/BusinessName+Field?src=contextnavpagetreemode">BusinessName Field</a>
</span>
</div>
<div id="children393855-0" class="plugin_pagetree_children_container">
</div>
</li>
Your site has structure, when inner level toggles are rendered only after click on parent level toggle.
To get all links of toggles you need to create loop, where you're:
When index would be out of bounds, the would be meant that no new toggle is appeared after opening the last one.
Toggles can be located by css selector a[data-tree-id]
.
So, your code should be:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
options = webdriver.ChromeOptions()
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 20)
def get_links_len():
return len(driver.find_elements(By.TAG_NAME, "a"))
def wait_for_values_not_equal(value_func, value, timeout=10, interval=0.5):
start_time = time.time()
while value_func() == value:
if time.time() - start_time > timeout:
raise TimeoutError("Timeout: Value did not change within the specified time.")
time.sleep(interval)
return value_func() != value
driver.get("https://ddwiki.reso.org/display/DDW17/")
elem = driver.find_element(By.CLASS_NAME, "plugin_pagetree_children_list")
wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "a[data-tree-id]")))
toggles = elem.find_elements(By.CSS_SELECTOR, "a[data-tree-id]")
temp_links_len = 0
iteration = 0
while True:
try:
temp_links_len = get_links_len()
toggles[iteration].click()
wait_for_values_not_equal(get_links_len, temp_links_len)
toggles = elem.find_elements(By.CSS_SELECTOR, "a[data-tree-id]")
iteration += 1
except IndexError:
break
links = elem.find_elements(By.TAG_NAME, 'a')
for lnk in links:
href = lnk.get_attribute('href')
if href:
print(href)