I have a query for google, which show me 8000 results with link, I only want to scrape links(urls) in search results, I am able to get first page links, Is there any method to scrape next pages. Here is my code
for page in range(0,7):
linkedin_urls = [url.text for url in linkedin_urls]
#print(linkedin_urls)
#loop to iterate through all links in the google search query
for gol_url in linkedin_urls:
print(gol_url)
#driver.get(Xmen_url)
#sel = Selector(text = driver.page_source)
sleep(3)
#Go back to google search
driver.get('https://www.gooogle.com')
sleep(3)
#locate search form by name
search_query = driver.find_element(By.NAME, 'q')
sleep(3)
#Input search words
search_query.send_keys('inurl:https://www.ama-assn.org/system/files')
#Simulate return key
search_query.send_keys(Keys.RETURN)
#find next page icon in Google search
#Next_Google_page = driver.find_element_by_link_text("Next").click()
Next_Google_page = driver.find_element(By.LINK_TEXT, "Next").click()
page += 1
There is no pagination now in google search but instead infinite scrolling. You need to scroll till the end of the page and wait for it to load more results automatically until you reach the end where you have to click 'more results' to see more result.
Here is a sample code using selenium to scroll till the end of google search.
import time
from selenium import webdriver
search_query_link = 'google_search_query_link'
driver = webdriver.Chrome()
driver.get(search_query_link)
current_height = driver.execute_script("return document.body.scrollHeight")
page_end = True
while page_end:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(5)
new_height = driver.execute_script("return document.body.scrollHeight")
if current_height == new_height:
page_end = False
else:
current_height = new_height
# Your code to extract all the links goes here
driver.quit()
You can further wrap this code in a loop to click on 'more results' every time you encounter it.