In a previous question, a user suggested the following approach for fetching multiple urls (API calls) with aiohttp
:
import asyncio
import aiohttp
url_list = ['https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530396000&before=1530436000', 'https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530436000&before=1530476000']
async def fetch(session, url):
async with session.get(url) as response:
return await response.json()['data']
async def fetch_all(session, urls, loop):
results = await asyncio.gather(*[loop.create_task(fetch(session, url)) for url in urls], return_exceptions= True)
return results
if __name__=='__main__':
loop = asyncio.get_event_loop()
urls = url_list
with aiohttp.ClientSession(loop=loop) as session:
htmls = loop.run_until_complete(fetch_all(session, urls, loop))
print(htmls)
However, this results in only returning Attribute errors:
[AttributeError('__aexit__',), AttributeError('__aexit__',)]
(which I enabled, otherwise it would just break). I really hope there is somebody here, who can help with this, it is still kind of hard to find resources for asyncio
etc. The returned data is in JSON format. In the end I would like to put all JSON dicts in a list.
Working example:
import asyncio
import aiohttp
import ssl
url_list = ['https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530396000&before=1530436000',
'https://api.pushshift.io/reddit/search/comment/?q=Nestle&size=30&after=1530436000&before=1530476000']
async def fetch(session, url):
async with session.get(url, ssl=ssl.SSLContext()) as response:
return await response.json()
async def fetch_all(urls):
async with aiohttp.ClientSession() as session:
results = await asyncio.gather(*[fetch(session, url) for url in urls], return_exceptions=True)
return results
if __name__ == '__main__':
urls = url_list
htmls = asyncio.run(fetch_all(urls))
print(htmls)