pythonpython-requestsyahoo-finance

Consume data from Yahoo Screener via requests


I ran a query on the Yahoo Screener at:

https://finance.yahoo.com/screener/equity/new

DevTools shows that the data came back as JSON via:

https://query2.finance.yahoo.com/v1/finance/screener?crumb=u0eNvTHfT6U&lang=en-US&region=US&formatted=true&corsDomain=finance.yahoo.com

So I tried to manually request the data with:

import json
import requests
url = "https://query2.finance.yahoo.com/v1/finance/screener"
payload = json.loads('{"size":25,"offset":0,"sortField":"intradaymarketcap","sortType":"DESC","quoteType":"EQUITY","topOperator":"AND","query":{"operator":"AND","operands":[{"operator":"or","operands":[{"operator":"EQ","operands":["region","us"]}]},{"operator":"or","operands":[{"operator":"LT","operands":["intradaymarketcap",2000000000]},{"operator":"BTWN","operands":["intradaymarketcap",2000000000,10000000000]}]}]},"userId":"","userIdType":"guid"}')
header = {
    "authority": "query2.finance.yahoo.com",
    "method":"POST",
    "path":"/v1/finance/screener?crumb=umZV3T8[ETC...]&lang=en-US&region=US&formatted=true&corsDomain=finance.yahoo.com",
    "scheme":"https",
    "Accept":"*/*",
    "Accept-Encoding":"gzip, deflate, br",
    "Accept-Language":"en-US,en;q=0.9",
    "Access-Control-Request-Headers":"content-type",
    "Access-Control-Request-Method":"POST",
    "Cache-Control":"no-cache",
    "Content-Type":"application/json",
    "Cookie":"tbla_id=33c52a3f-2fd9-41[ETC...]",
    "Origin":"https://finance.yahoo.com",
    "Pragma":"no-cache",
    "Referer":"https://finance.yahoo.com/screener/equity/new",
    "Sec-Ch-Ua":"\"Chromium\";v=\"116\",\"Google Chrome\";v=\"116\"",
    "Sec-Ch-Ua-Platform":"Windows",
    "Sec-Fetch-Dest":"empty",
    "Sec-Fetch-Mode":"cors",
    "Sec-Fetch-Site":"same-site",
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
    }
response = requests.post(
    url = url,
    headers = header,
    data = json.dumps(payload),
    timeout = 30)
data_json = json.loads(response.content)

Even if I use the cookie and crumb from the original request header, I get this error:

{'code': 'Unauthorized', 'description': 'Invalid Crumb'}

Is this even possible via requests?


Solution

  • It seems, you need to set additional cookie A1 to get correct answer. You should find the value of the cookie in the Web Developer Tools:

    import requests
    
    api_url = "https://query1.finance.yahoo.com/v1/finance/screener"
    
    payload = {
        "offset": 0,
        "query": {
            "operands": [
                {
                    "operands": [{"operands": ["region", "us"], "operator": "EQ"}],
                    "operator": "or",
                }
            ],
            "operator": "AND",
        },
        "quoteType": "EQUITY",
        "size": 25,
        "sortField": "intradaymarketcap",
        "sortType": "DESC",
        "topOperator": "AND",
        "userId": "",
        "userIdType": "guid",
    }
    
    params = {
        "crumb": "EwuCwsPbKM2",
        "lang": "en-US",
        "region": "US",
        "formatted": "true",
        "corsDomain": "finance.yahoo.com",
    }
    
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/117.0",
    }
    
    with requests.session() as s:
        s.cookies[
            "A1"
        ] = "d=AQABBK8KXmQCEA8-VE0dBLqG5QEpQ7OglmEFEgABCAFH_2QyZfNtb2UB9qMAAAcIqgpeZJj7vK8&S=AQAAAnAOty-NkkMJle5hzDjUjSQ"
    
        data = s.post(api_url, params=params, json=payload, headers=headers).json()
        print(data)
    

    Prints:

    {
        "finance": {
            "result": [
                {
                    "start": 0,
                    "count": 25,
                    "total": 14459,
                    "quotes": [
                        {
                            "symbol": "AAPL",
                            "twoHundredDayAverageChangePercent": {
                                "raw": 0.07432321,
                                "fmt": "7.43%",
                            },
                            "dividendDate": {
                                "raw": 1692230400,
                                "fmt": "2023-08-16",
                                "longFmt": "2023-08-16T20:00",
                            },
    
    ...