I try to scrape games and odds from this url Merkurbets with selenium and python.
The element for "team-name" looks like. Is _ngcontent-ng-c1043474636 in div-tag a problem for selenium to find the text() of div-tag?
<div _ngcontent-ng-c1043474636="" class="team-name">FAC Wien</div>
I've tried a lot of combination to get data out of this url, here are just one example:
# games_elements = driver.find_elements(By.XPATH, '//*[@id="cdk-accordion-child-3"]/div/div/games-list-partial/div')
games_elements = driver.find_elements(By.XPATH,'//*[@id="cdk-accordion-child-3"]/div/div/games-list-partial/div/games-list-game[1]')
print('games_elements: ', games_elements)
for game in games_elements:
try:
# Mannschaftsnamen extrahieren
teams = game.find_elements(By.XPATH, './/*[@id="cdk-accordion-child-3"]/div/div/games-list-partial/div/games-list-game[1]/div/div[1]/div/game-details-summary/div/div/div/div[1]')
# Quoten extrahieren
odds = game.find_elements(By.XPATH, './/*[@id="cdk-accordion-child-3"]/div/div/games-list-partial/div/games-list-game[1]/div/div[2]/div[1]/market/div/div[2]/div/tip[1]/div/button/span[2]/div/tip-odds/div/div')
if len(teams) == 2 and len(odds) >= 3:
home_team = teams[0].text
away_team = teams[1].text
home_odds = odds[0].text
draw_odds = odds[1].text
away_odds = odds[2].text
games_data.append({
"Home Team": home_team,
"Away Team": away_team,
"Home Odds": home_odds,
"Draw Odds": draw_odds,
"Away Odds": away_odds,
"Zeitstempel": timestamp # Zeitstempel
})
else:
logging.warning("Unerwartete Anzahl von Teams oder Quoten für ein Spiel gefunden.")
except NoSuchElementException as e:
logging.error(f"Fehler beim Extrahieren der Daten für ein Spiel: {e}")
or here
# Alternativ: Versuche eine längere Wartezeit mit Sichtbarkeitsprüfung
wait = WebDriverWait(driver, 30)
try:
# Greife auf ein sichtbares 'team-name'-Element zu
team_name_element = wait.until(
EC.visibility_of_element_located((By.CLASS_NAME, "team-name"))
)
print(team_name_element.text)
except Exception as e:
print(f"Fehler: {e}")
finally:
# Speichere die Seite, um den Inhalt zu überprüfen
page_source = driver.page_source
with open('page_source.html', 'w', encoding='utf-8') as f:
f.write(page_source)
print("HTML-Inhalt gespeichert.")
and always chromedrive accept cookies and scroll down
def scroll(driver):
height = driver.execute_script("return document.body.scrollHeight")
print('height: ', height)
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(1) # <- traditional hack
hnow = driver.execute_script("return document.body.scrollHeight")
print('hnow: ', hnow)
if hnow == height:
break
height = hnow
# Starte den Webdriver
chrome_options = Options()
# chrome_options.add_argument("--headless") # Optional: Ohne Fenster anzeigen
service = Service(executable_path="/usr/bin/chromedriver") # Pfad zu chromedriver
driver = webdriver.Chrome(service=service, options=chrome_options)
# Öffne die Webseite
driver.get("https://www.merkurbets.de/de/sports/0/s1/1")
driver.maximize_window()
time.sleep(5)
# Cookies akzeptieren
accept_button = driver.find_element(By.XPATH, '//*[@id="cookiescript_accept"]')
accept_button.click()
time.sleep(2)
scroll(driver)
# Warte 10 Sekunden (probiere dies nur zu Debugging-Zwecken)
time.sleep(2)
team = driver.find_elements(By.XPATH, './/div[contains(@class,"team-name")]')
I've try a lot of variation with XPATH, CSS_SELECTOR, CLASS_NAME etc. but everytime I get empty list, no elements. Please help.
Or you could send the api request directly.