I need to calculate the hash of images that I download from a cdn server. When using synchronous libraries everything works out well. I directly pass the results of the request (res) to Image.open() and receive an object with an image as output (img).
import requests
import imagehash
from PIL import Image
# sync
def get_picture():
url = 'https://ae01.alicdn.com/kf/Sec174725eb944b4693342178da975d52z.jpg'
res = requests.get(url, stream=True).raw
img = Image.open(res)
hash = imagehash.phash(img, hash_size=6)
print(f'hash: {hash}')
but I need to check a lot of photos and asynchronous execution of requests is required, and here I ran into a problem in that I could not directly transfer the request object with the image to Image.open(), only by first saving it to a file, but this option is not suitable for me ( I don’t need files on the server).
import imagehash
from PIL import Image
from aiohttp import ClientSession
from config.settings import BASE_DIR
# async
async def get_aio_picture():
url = 'https://ae01.alicdn.com/kf/Sec174725eb944b4693342178da975d52z.jpg'
async with ClientSession() as session:
async with session.get(url=url) as response:
response.auto_decompress = False
with open('picture.jpg', 'wb') as fd:
async for line in response.content:
fd.write(line)
img = Image.open(f'{BASE_DIR}/picture.jpg')
hash = imagehash.phash(img, hash_size=6)
print(f'hash: {hash}')
async def main():
task = asyncio.create_task(get_aio_picture())
await task
def aio_hash():
asyncio.run(main())
If I change the function code in such a way as to directly pass the result to Image.open():
async def get_aio_picture():
url = 'https://ae01.alicdn.com/kf/Sec174725eb944b4693342178da975d52z.jpg'
async with ClientSession() as session:
async with session.get(url=url) as response:
response.auto_decompress = False
async for line in response.content:
img = Image.open(line)
hash = imagehash.phash(img, hash_size=6)
print(f'hash: {hash}')
Then I get the error :(
...
File "/home/.../main/services/aio/help.py", line 37, in get_aio_picture
img = Image.open(line)
File "/home/.../env/lib/python3.10/site-packages/PIL/Image.py", line 3247, in open
fp = builtins.open(filename, "rb")
ValueError: embedded null byte
How can I change the code to avoid creating files on disk? Help me please!
You can use the function response.read() and store the result in a buffer of bytes variable using io.BytesIO. Generate the hash from this buffer.
import asyncio
import imagehash
from PIL import Image
from aiohttp import ClientSession
import io
# async
async def get_aio_picture():
url = 'https://ae01.alicdn.com/kf/Sec174725eb944b4693342178da975d52z.jpg'
async with ClientSession() as session:
async with session.get(url=url) as response:
response.auto_decompress = False
buffer = io.BytesIO(await response.read())
img = Image.open(buffer)
hash = imagehash.phash(img, hash_size=6)
print(f'hash: {hash}')
async def main():
task = asyncio.create_task(get_aio_picture())
await task
def aio_hash():
asyncio.run(main())
aio_hash()
Outputs:
hash: fbc843946