pythonjsonimagegoogle-image-search

Download pictures from google images using python


i have a preety big json file with a lot of car brands and models(more than 1000 cars) and the json file looks like these:

{
"0":"honda_civic"
"1":"nissan_leaf"
...
"id":"brand_model"
}

and i wander if i can save photos from google and put them into coresponding folders (the path for first element is honda/civic).

My question here is there any function that can get the first result photo from google and save it locally?(it would be great if it could also sort photos that are labeled with permission to reuse).


Solution

  • This code did the trick for me :

    import json
    myjson=json.load(open('id.json','rb'))
    print("starting")
    from selenium import webdriver
    import time
    import urllib.request
    import os
    from selenium.webdriver.common.keys import Keys
    i=0
    def searchnsave(id,key_word):
        browser = webdriver.Chrome("C:/Program Files (x86)/Google/Chrome/Application/chromedriver.exe") #incase you are chrome
        browser.get("https://www.google.com/search?q="+key_word+"&tbm=isch&ved=2ahUKEwi2hvndl-jqAhWSkqQKHffTA5sQ2-cCegQIABAA&oq="+key_word+"&gs_lcp=CgNpbWcQAzIECAAQEzIECAAQEzIECAAQEzIECAAQEzIECAAQE1D2FVj2FWDHG2gAcAB4AIABUogBUpIBATGYAQCgAQGqAQtnd3Mtd2l6LWltZ8ABAQ&sclient=img&ei=LAgcX7bAM5KlkgX3p4_YCQ&bih=722&biw=1519&tbs=sur%3Afc&hl=ro")
        #search = browser.find_element_by_name('q')
        #search.send_keys(key_words,Keys.ENTER)
        #elem = browser.find_element_by_link_text('Images')
        #elem.get_attribute('href')
        #elem.click()
        value = 0
        #for i in range(20):
         #  browser.execute_script("scrollBy("+ str(value) +",+1000);")
         #  value += 1000
         #  time.sleep(3)
        elem1 = browser.find_element_by_id('islmp')
        sub = elem1.find_elements_by_tag_name("img")
        count = 0
        for i in sub:
                src = i.get_attribute('src')
                try:
                    if src != None:
                        src  = str(src)
                        print(src)
                        count+=1
                        urllib.request.urlretrieve(src, os.path.join('downloads',key_word+'.jpg'))
                    else:
                        raise TypeError
                except TypeError:
                    print('fail')
                break
        browser.close()
    
    
    for k,v in myjson.items():
        print(k,v)
        searchnsave(k,v)
    

    But dont forget to first install selenium with the following command:

    pip3 install selenium
    

    and also install correct chromedriver.exe from here : http://chromedriver.storage.googleapis.com/index.html