In Python I want to save an image to file. The filenames should be hashes, generated by imagehash.average_hash()
. Using ls -l
I see files but they are empty:
-rw-r--r-- 1 lorem lorem 0 8 Sep 16:20 c4c0bcb49890bcfc.jpg
-rwxr-xr-x 1 lorem lorem 837 8 Sep 16:19 minimal.py
Code:
import requests
from PIL import Image
import imagehash
import shutil
def safe_to_file(url):
headers = {
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'}
image_hash = ''
r = requests.get(url, headers=headers, timeout=10, stream=True)
try:
if r.status_code == 200:
image_hash = str(imagehash.average_hash(Image.open(r.raw))) + '.jpg'
print(image_hash)
with open(image_hash, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
except Exception as ex:
print(str(ex))
finally:
return image_hash
# Random jpg picture
url = 'https://cdn.ebaumsworld.com/mediaFiles/picture/1035099/85708057.jpg'
safe_to_file(url)
I would expect images which aren't empty. What am I doing wrong?
As I suspected, the creation of the PIL.Image
object consumes and downloads all the image data from the url, so there's nothing for shutil.copyfileobj()
to consume.
The code below seems to avoid that problem by explicitly saving the Image
object with the desired hash-based filename. I added comments to indicate the significant changes.
import imagehash
from PIL import Image
import requests
#import shutil
def safe_to_file(url):
headers = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/53.0.2785.143 Safari/537.36'}
image_hash = ''
r = requests.get(url, headers=headers, timeout=10, stream=True)
try:
if r.status_code == 200:
img = Image.open(r.raw) # ADDED
image_hash = str(imagehash.average_hash(img)) + '.jpg' # CHANGED.
print('saving image:', image_hash)
img.save(image_hash) # ADDED
# with open(image_hash, 'wb') as f: # REMOVED
# r.raw.decode_content = True # REMOVED
# shutil.copyfileobj(r.raw, f) # REMOVED
except Exception as ex:
print(str(ex))
finally:
return image_hash
# Random jpg picture
url = 'https://cdn.ebaumsworld.com/mediaFiles/picture/1035099/85708057.jpg'
safe_to_file(url)
c4c0bcb49890bcfc.jpg
file it created: