I'm using BeautifulSoup and ok, is scraping the website. But in this website there are three list buttons and four other buttons in it. And whenever I click in one of the buttons the website changes, but I can't scrap what changed, only the original page. What I'm trying to do is scrap the page of all the combinations of this three buttons. More specifically, I want to get the values of the table after click.
Command below:
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
#pip install selenium
#dbutils.library.restartPython()
html = requests.get("xxxx").content
soup = BeautifulSoup(html, 'html.parser')
print(soup.prettify())
preco = soup.find("table", class_="ajax-overlay")
print(preco)
buttons = soup.findAll('fieldset')
print(buttons)
I tried to use the BeautifulSoup with the command below:
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
#pip install selenium
#dbutils.library.restartPython()
html = requests.get("xxxxx").content
soup = BeautifulSoup(html, 'html.parser')
print(soup.prettify())
preco = soup.find("table", class_="ajax-overlay")
print(preco)
buttons = soup.findAll('fieldset')
print(buttons)
The site retrieves information dynamically, so u can get every button value
def get_options() -> dict:
url = 'https://kitcorretoramil.com.br/linha-selecionada-pme/tabela-de-precos-pme/'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
estado = [x.get('value') for x in soup.find('select', {'id': 'estado'}).find_all('option') if x.get('value')]
compulsorio = [x.get('value') for x in soup.find('select', {'id': 'compulsorio'}).find_all('option') if x.get('value')]
numero_de_vidas_plano = [x.get('value') for x in soup.find('select', {'id': 'numero_de_vidas_plano'}).find_all('option') if x.get('value')]
return {
'Estado': estado,
'Compulsorio': compulsorio,
'Numero_de_vidas_plano': numero_de_vidas_plano
}
OUTPUT:
'Estado': ['BAHIA', 'CEARÁ', 'DISTRITO FEDERAL', 'GOIÁS', 'CAMPINAS', 'MARANHÃO', 'MINAS GERAIS', 'PARAÍBA', 'PARANÁ', 'PERNAMBUCO', 'RIO DE JANEIRO', 'RIO GRANDE DO NORTE', 'RIO GRANDE DO SUL', 'SANTA CATARINA', 'SÃO PAULO'], 'Compulsorio': ['Compulsório', 'Livre Adesão'], 'Numero_de_vidas_plano': ['2', '3 a 4', '5 a 29', '30 a 99']}
now u need get table, just change estado, compulsorio and numero_de_vidas_plano values
def get_table(estado: str, compulsorio: str, numero_de_vidas_plano: str) -> pd.DataFrame():
url = "https://kitcorretoramil.com.br/wp-admin/admin-ajax.php?action=ktc_get_price_table_values"
payload = json.dumps({
"pf": "false",
"Estado": estado,
"Compulsorio": compulsorio,
"Numero_de_vidas_plano": numero_de_vidas_plano,
"Linha": "Linha Selecionada",
"Coparticipação": "Com coparticipação30"
})
headers = {
'accept': '*/*',
'content-type': 'application/json',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
}
response = requests.request("POST", url, headers=headers, data=payload)
json_data = response.json()
del json_data['legal_text']
return pd.DataFrame(json_data)
and try with some values
df = get_table('BAHIA', 'Compulsório', '2')
print(df.to_string())