I want to get the data shown in HTML table in this website. Python requests and beautifulsoup are the main packages used. When I use post method with the parameters I get the HTML page with empty table. I'm not table to understand what is going on.
def casestatus_name(csrf, party):
try:
test = url_get('https://cestat.gov.in')
cookies = test.cookies.get_dict()
test.raise_for_status()
except Exception as e:
print(e)
sys.exit(1)
payload = {
'csrf_token': cookies['csrf_cookie_name'],
'schema_type': 'bangalore',
'app_type': 'pno',
'token_no': party,
'captcha_code': '111111',
'button3': 'SEARCH'
}
response = requests.post("https://cestat.gov.in/casestatus", json=payload)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find_all('table', id='example')
print(response.text)
return response
here's a working example with session
import requests
from bs4 import BeautifulSoup
url = "https://cestat.gov.in/casestatus"
party = "access enterprises"
with requests.session() as s:
# one first GET to retrieve the csrf token and other cookies
s.get(url)
payload = {
'csrf_token': s.cookies['csrf_cookie_name'],
'schema_type': 'bangalore',
'app_type': 'pno',
'token_no': party,
'captcha_code': '111111',
'button3': 'SEARCH'
}
response = s.post(url, data=payload)
soup = BeautifulSoup(response.text, 'html.parser')
link = soup.select_one('table#example a')['href']
print(link)
# if you want to scrape this link, just stay within the session
# response2 = s.get(link)
output:
https://cestat.gov.in/casedetailreport/2952510201062016/bangalore