pythonweb-scrapingcurl

Python investing.com historical stock data by curl request


How to force the server to send me the query data by curl in Python directly from the url_historical_data page as shown in the example. I have already tried different things, but I must do something wrong when passing the query parameters. I can't figure it out. Running Windows curl on a 64 bit PC. Please help?

from subprocess import run, PIPE
from htmlement import HTMLement
from json import dumps


def get_data(tabel):
    try:
        thead = tabel.find("thead")
        trh = thead.find("tr")
        header = []
        cnt = 0
        for th in trh:
            tmp = th.find('div').find('button').find('span')
            header.append(tmp.text)
            cnt += 1
        print(header)

        body = tabel.find("tbody")
        for tr in body.findall("tr"):
            row = []
            cnt = 0
            for td in tr:
                if cnt == 0:
                    tijd = td.find('time')
                    row = [tijd.text]
                else:
                    row.append(td.text)
                cnt += 1
            print(row)

    except:
        print('table ; data ; invalid')
    return


def get_document(val):
    url = f'https://www.investing.com{val}-historical-data'
    print('url:', url)
    params = {"from_date": "07/01/2024", "to_date": "08/31/2024", "interval": "Daily"}
    cnfg = ['curl.exe', '-s', '-A ' + 'Chrome/91.0.4472.114', '-H Content-Type: application/json', '-d ' + dumps(params), url]
    print('config:', cnfg, chr(10))

    htm_doc = run(cnfg, stdout=PIPE).stdout.decode('utf-8')
    try:
        parser = HTMLement("table", attrs={"class": "freeze-column-w-1 w-full overflow-x-auto text-xs leading-4"})
        parser.feed(htm_doc)
        table = parser.close()
        get_data(table)
    except:
        print('html ; table ; invalid')
    return

if __name__ == '__main__':
    get_document('/equities/aarons')
    exit()
    

The above python script will return the default page output instead of returning the data corresponding the query parameters:

url: https://www.investing.com/equities/aarons-historical-data
config: ['curl.exe', '-s', '-A Chrome/91.0.4472.114', '-H Content-Type: application/json', "-d {'st_date': '07/01/2024', 'end_date': '08/31/2024', 'interval_sec': 'Daily', 'action': 'historical_data'}", 'https://www.investing.com/equities/aarons-historical-data']

['Date', 'Price', 'Open', 'High', 'Low', 'Vol.', 'Change %']
['Sep 03, 2024', '10.00', '9.97', '10.08', '9.83', '2.96M', '-0.89%']
['Aug 30, 2024', '10.09', '10.09', '10.10', '10.09', '257.67K', '0.00%']
['Aug 29, 2024', '10.09', '10.10', '10.10', '10.09', '237.03K', '0.00%']
['Aug 28, 2024', '10.09', '10.09', '10.11', '10.08', '729.33K', '+0.20%']
['Aug 27, 2024', '10.07', '10.08', '10.09', '10.07', '313.87K', '-0.10%']
['Aug 26, 2024', '10.08', '10.09', '10.10', '10.08', '349.82K', '+0.10%']
['Aug 23, 2024', '10.07', '10.09', '10.11', '10.07', '879.99K', '0.00%']
['Aug 22, 2024', '10.07', '10.08', '10.10', '10.07', '599.27K', '-0.20%']
['Aug 21, 2024', '10.09', '10.09', '10.10', '10.08', '376.91K', '+0.10%']
['Aug 20, 2024', '10.08', '10.09', '10.09', '10.08', '484.85K', '0.00%']
['Aug 19, 2024', '10.08', '10.08', '10.10', '10.07', '1.04M', '+0.10%']
['Aug 16, 2024', '10.07', '10.08', '10.09', '10.07', '827.66K', '-0.10%']
['Aug 15, 2024', '10.08', '10.08', '10.09', '10.06', '776.58K', '+0.20%']
['Aug 14, 2024', '10.06', '10.08', '10.10', '10.06', '655.35K', '0.00%']
['Aug 13, 2024', '10.06', '10.04', '10.10', '10.04', '1.39M', '+0.50%']
['Aug 12, 2024', '10.01', '10.03', '10.04', '10.01', '463.42K', '-0.10%']
['Aug 09, 2024', '10.02', '10.01', '10.02', '10.00', '738.52K', '0.00%']
['Aug 08, 2024', '10.02', '10.04', '10.04', '10.00', '214.86K', '+0.20%']
['Aug 07, 2024', '10.00', '10.00', '10.06', '9.99', '590.25K', '-0.40%']
['Aug 06, 2024', '10.04', '9.96', '10.09', '9.95', '747.36K', '+0.70%']
['Aug 05, 2024', '9.97', '9.96', '10.02', '9.95', '1.23M', '-0.60%']

Process finished with exit code 0

Solution

  • You can get the data as JSON from the API:

    import subprocess
    import json
    from urllib.parse import urlencode
    
    def get_data(stock_id):
        url = f'https://api.investing.com/api/financialdata/historical/{stock_id}' 
        params = {
            'start-date': '2024-07-01',
            'end-date': '2024-08-31',
            'time-frame': 'Daily',
            'add-missing-rows': 'false'
        }
    
        cnfg = ['curl', '-A', 'Chrome/128.0.0.0', '-H', 'domain-id: www', '-G', url, '-d', urlencode(params)]
    
        output = subprocess.run(cnfg, capture_output=True).stdout.decode()
        return json.loads(output)
    
    
    data = get_data('39136')
    print(data)
    

    Or you can use curl_cffi which supports http2:

    pip install curl_cffi --upgrade
    

    It works just like requests:

    from curl_cffi import requests
    
    headers = {
        'domain-id': 'www',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
    }
    
    params = {
        'start-date': '2024-07-01',
        'end-date': '2024-08-31',
        'time-frame': 'Daily',
        'add-missing-rows': 'false',
    }
    
    url = 'https://api.investing.com/api/financialdata/historical/39136'
    response = requests.get(url, params=params, headers=headers)
    
    data = response.json()
    print(data)