pythonjsonpandasapicoinmarketcap

Slow code when fetching data from Coinmarketcap api


I have the below code that fetches data from Coinmarketcap api and sends me a telegram message when parameters are met. When I fetch 100 coins then the code works fine. But when I fetch 5000 coins the code is very slow. The schedule time with refresh api is not the time that I have code.

Can someone see why the code is slow with fetching data from the api with 5000 coins?


A good answer is insert httpx / asyncio in the code. (answer Pawel Rubin (thanks). Does someone know how i can insert asyncio into the code?


from tokens import cmc_token
import json
import re
import requests
from datetime import date
import datetime
from datetime import datetime as dt
from datetime import datetime
from datetime import datetime, timedelta
import schedule
import time
import pandas as pd

lijst = []
price_change2 = []
coinlist = []

def tg():
    token = 'xxxxxxxxxxx'

    def write_json(data, filename='response.json'):
        with open(filename, 'w') as f:
            json.dump(data, f, indent=4, ensure_ascii=False)

    url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
    params = {'start': '1', 'limit': '5000', 'convert': 'usd', 'sort': 'date_added','sort_dir': 'desc' }
    headers = {'X-CMC_PRO_API_KEY': cmc_token}

    d = requests.get(url, headers=headers, params=params).json()
    
    def send_message_two(text='bla-bla-bla'):
        url = f'https://api.telegram.org/bot{token}/sendMessage'
        payload = {'chat_id': xxxxxxx, 'text' : text}

        p = requests.post(url, json=payload)
        return p


    for x in d['data']:
        date_json1 = x['last_updated']
        new_date1 = datetime.fromisoformat(date_json1[:-1])
        new_date_plus1 = new_date1 + timedelta(hours=2)
        new_date_str1 = new_date_plus1.strftime('%Y-%m-%d %H:%M:%S')

        price_now = x['symbol'], new_date_str1, x['quote']['USD']['price'], x['platform']
        price_change2.append(price_now)

        Dataset = pd.DataFrame(price_change2)
        df_new = Dataset.rename(columns={0:'coin', 2:'price', 1:'last_updated', 3:'platform'})
        zx = df_new.sort_values(['coin','last_updated'])


        zx['change1'] = zx.groupby('coin')['price'].pct_change().mul(100)
        zx['change1'] = zx['change1'].fillna(0)

        js = zx.to_json(orient="table")
        parsed = json.loads(js)

        for bn in parsed['data']:
            rt = bn['coin'], bn['change1'], bn['last_updated'],bn['price'], bn['platform']

            if bn['change1'] is None:
                return 0


            if bn['change1'] > 35 and bn['change1'] < 100 and bn['coin'] not in coinlist:
                coinlist.append(bn['coin'])
                send_message_two(rt)
                send_message_two('15 min change 35% all time')


schedule.every(900).seconds.do(tg)

while True:
    schedule.run_pending()
    time.sleep(1)

def main():
    pass


if __name__ == '__main__':
    main()

Solution

  • The code is making requests sequentially for every element in parsed['data'].

    Consider running your code asynchronously with some HTTP client that supports asyncio, for example httpx, and use asyncio.gather to run your requests concurrently.

    Consider the following example which makes 50 GET requests to google.com using requests and using httpx. Note that the async solution is significantly faster.

    import asyncio
    import time
    
    import requests
    import httpx
    
    
    def test_sync():
        for _ in range(50):
            requests.get("https://google.com")
    
    
    async def test_async():
        async with httpx.AsyncClient() as client:
            tasks = [client.get("https://google.com") for _ in range(50)]
            await asyncio.gather(*tasks)
    
    
    if __name__ == "__main__":
        start = time.process_time()
        test_sync()
        print(f"sync test took {time.process_time() - start} s")
    
        start = time.process_time()
        asyncio.run(test_async())
        print(f"async test took {time.process_time() - start} s")
    
    
    # sync test took 23.0833 s
    # async test took 0.2662 s