How to force the server to send me the query data by curl
in Python directly from the url_historical_data
page as shown in the example. I have already tried different things, but I must do something wrong when passing the query parameters. I can't figure it out. Running Windows curl
on a 64 bit PC. Please help?
from subprocess import run, PIPE
from htmlement import HTMLement
from json import dumps
def get_data(tabel):
try:
thead = tabel.find("thead")
trh = thead.find("tr")
header = []
cnt = 0
for th in trh:
tmp = th.find('div').find('button').find('span')
header.append(tmp.text)
cnt += 1
print(header)
body = tabel.find("tbody")
for tr in body.findall("tr"):
row = []
cnt = 0
for td in tr:
if cnt == 0:
tijd = td.find('time')
row = [tijd.text]
else:
row.append(td.text)
cnt += 1
print(row)
except:
print('table ; data ; invalid')
return
def get_document(val):
url = f'https://www.investing.com{val}-historical-data'
print('url:', url)
params = {"from_date": "07/01/2024", "to_date": "08/31/2024", "interval": "Daily"}
cnfg = ['curl.exe', '-s', '-A ' + 'Chrome/91.0.4472.114', '-H Content-Type: application/json', '-d ' + dumps(params), url]
print('config:', cnfg, chr(10))
htm_doc = run(cnfg, stdout=PIPE).stdout.decode('utf-8')
try:
parser = HTMLement("table", attrs={"class": "freeze-column-w-1 w-full overflow-x-auto text-xs leading-4"})
parser.feed(htm_doc)
table = parser.close()
get_data(table)
except:
print('html ; table ; invalid')
return
if __name__ == '__main__':
get_document('/equities/aarons')
exit()
The above python script will return the default page output instead of returning the data corresponding the query parameters:
url: https://www.investing.com/equities/aarons-historical-data
config: ['curl.exe', '-s', '-A Chrome/91.0.4472.114', '-H Content-Type: application/json', "-d {'st_date': '07/01/2024', 'end_date': '08/31/2024', 'interval_sec': 'Daily', 'action': 'historical_data'}", 'https://www.investing.com/equities/aarons-historical-data']
['Date', 'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %']
['Sep 03, 2024', '10.00', '9.97', '10.08', '9.83', '2.96M', '-0.89%']
['Aug 30, 2024', '10.09', '10.09', '10.10', '10.09', '257.67K', '0.00%']
['Aug 29, 2024', '10.09', '10.10', '10.10', '10.09', '237.03K', '0.00%']
['Aug 28, 2024', '10.09', '10.09', '10.11', '10.08', '729.33K', '+0.20%']
['Aug 27, 2024', '10.07', '10.08', '10.09', '10.07', '313.87K', '-0.10%']
['Aug 26, 2024', '10.08', '10.09', '10.10', '10.08', '349.82K', '+0.10%']
['Aug 23, 2024', '10.07', '10.09', '10.11', '10.07', '879.99K', '0.00%']
['Aug 22, 2024', '10.07', '10.08', '10.10', '10.07', '599.27K', '-0.20%']
['Aug 21, 2024', '10.09', '10.09', '10.10', '10.08', '376.91K', '+0.10%']
['Aug 20, 2024', '10.08', '10.09', '10.09', '10.08', '484.85K', '0.00%']
['Aug 19, 2024', '10.08', '10.08', '10.10', '10.07', '1.04M', '+0.10%']
['Aug 16, 2024', '10.07', '10.08', '10.09', '10.07', '827.66K', '-0.10%']
['Aug 15, 2024', '10.08', '10.08', '10.09', '10.06', '776.58K', '+0.20%']
['Aug 14, 2024', '10.06', '10.08', '10.10', '10.06', '655.35K', '0.00%']
['Aug 13, 2024', '10.06', '10.04', '10.10', '10.04', '1.39M', '+0.50%']
['Aug 12, 2024', '10.01', '10.03', '10.04', '10.01', '463.42K', '-0.10%']
['Aug 09, 2024', '10.02', '10.01', '10.02', '10.00', '738.52K', '0.00%']
['Aug 08, 2024', '10.02', '10.04', '10.04', '10.00', '214.86K', '+0.20%']
['Aug 07, 2024', '10.00', '10.00', '10.06', '9.99', '590.25K', '-0.40%']
['Aug 06, 2024', '10.04', '9.96', '10.09', '9.95', '747.36K', '+0.70%']
['Aug 05, 2024', '9.97', '9.96', '10.02', '9.95', '1.23M', '-0.60%']
Process finished with exit code 0
You can get the data as JSON from the API:
import subprocess
import json
from urllib.parse import urlencode
def get_data(stock_id):
url = f'https://api.investing.com/api/financialdata/historical/{stock_id}'
params = {
'start-date': '2024-07-01',
'end-date': '2024-08-31',
'time-frame': 'Daily',
'add-missing-rows': 'false'
}
cnfg = ['curl', '-A', 'Chrome/128.0.0.0', '-H', 'domain-id: www', '-G', url, '-d', urlencode(params)]
output = subprocess.run(cnfg, capture_output=True).stdout.decode()
return json.loads(output)
data = get_data('39136')
print(data)
Or you can use curl_cffi which supports http2:
pip install curl_cffi --upgrade
It works just like requests:
from curl_cffi import requests
headers = {
'domain-id': 'www',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
}
params = {
'start-date': '2024-07-01',
'end-date': '2024-08-31',
'time-frame': 'Daily',
'add-missing-rows': 'false',
}
url = 'https://api.investing.com/api/financialdata/historical/39136'
response = requests.get(url, params=params, headers=headers)
data = response.json()
print(data)