I have the following working test-solutions which outputs the IP-address and information -
Now I want to use this with my ScraperAPI-Account with other Proxies. But when I uncomment this 2 lines -
# PROXY = f'http://scraperapi:{SCRAPER_API}@proxy-server.scraperapi.com:8001'
# options.add_argument('--proxy-server=%s' % PROXY)
the solution is not working anymore -
How can I use my proxies with selenium / that code? (scraperAPI is recommending using the selenium-wire module but I don´t like it cause it has some dependencies to a specific version of other tools - so I would like to use the proxies without that)
Is this possible?
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from sys import platform
import os, sys
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from fake_useragent import UserAgent
from dotenv import load_dotenv, find_dotenv
WAIT = 10
load_dotenv(find_dotenv())
SCRAPER_API = os.environ.get("SCRAPER_API")
# PROXY = f'http://scraperapi:{SCRAPER_API}@proxy-server.scraperapi.com:8001'
srv=Service(ChromeDriverManager().install())
ua = UserAgent()
userAgent = ua.random
options = Options()
options.add_argument('--headless')
options.add_experimental_option ('excludeSwitches', ['enable-logging'])
options.add_argument("start-maximized")
options.add_argument('window-size=1920x1080')
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
options.add_argument(f'user-agent={userAgent}')
# options.add_argument('--proxy-server=%s' % PROXY)
path = os.path.abspath (os.path.dirname (sys.argv[0]))
if platform == "win32": cd = '/chromedriver.exe'
elif platform == "linux": cd = '/chromedriver'
elif platform == "darwin": cd = '/chromedriver'
driver = webdriver.Chrome (service=srv, options=options)
waitWebDriver = WebDriverWait (driver, 10)
link = "https://whatismyipaddress.com/"
driver.get (link)
time.sleep(WAIT)
soup = BeautifulSoup (driver.page_source, 'html.parser')
tmpIP = soup.find("span", {"id": "ipv4"})
tmpP = soup.find_all("p", {"class": "information"})
for e in tmpP:
tmpSPAN = e.find_all("span")
for e2 in tmpSPAN:
print(e2.text)
print(tmpIP.text)
driver.quit()
There are a couple of things you need to look back:
First of all, there seems be a typo. There is a space character between get
and ()
which may cause:
IndexError: list index out of range
Not sure what the following line does as I'm able to execute without the line. You may like to comment it.
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
If you want to stop using SCRAPER_API do comment out the following line as well:
SCRAPER_API = os.environ.get("SCRAPER_API")
Making those minor tweaks and optimizing your code:
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
WAIT = 10
srv=Service(ChromeDriverManager().install())
ua = UserAgent()
userAgent = ua.random
options = Options()
options.add_argument('--headless')
options.add_experimental_option ('excludeSwitches', ['enable-logging'])
options.add_argument("start-maximized")
options.add_argument('window-size=1920x1080')
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
options.add_argument(f'user-agent={userAgent}')
driver = webdriver.Chrome (service=srv, options=options)
waitWebDriver = WebDriverWait (driver, 10)
link = "https://whatismyipaddress.com/"
driver.get(link)
driver.save_screenshot("whatismyipaddress.png")
time.sleep(WAIT)
soup = BeautifulSoup (driver.page_source, 'html.parser')
tmpIP = soup.find("span", {"id": "ipv4"})
tmpP = soup.find_all("p", {"class": "information"})
for e in tmpP:
tmpSPAN = e.find_all("span")
for e2 in tmpSPAN:
print(e2.text)
print(tmpIP.text)
driver.quit()
Console Output:
[WDM] -
[WDM] - ====== WebDriver manager ======
[WDM] - Current google-chrome version is 96.0.4664
[WDM] - Get LATEST driver version for 96.0.4664
[WDM] - Driver [C:\Users\Admin\.wdm\drivers\chromedriver\win32\96.0.4664.45\chromedriver.exe] found in cache
ISP:
Jio
City:
Pune
Region:
Maharashtra
Country:
India
123.12.234.23
Saved Screenshot:
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
WAIT = 10
load_dotenv(find_dotenv())
SCRAPER_API = os.environ.get("SCRAPER_API")
PROXY = f'http://scraperapi:{SCRAPER_API}@proxy-server.scraperapi.com:8001'
srv=Service(ChromeDriverManager().install())
ua = UserAgent()
userAgent = ua.random
options = Options()
options.add_argument('--headless')
options.add_experimental_option ('excludeSwitches', ['enable-logging'])
options.add_argument("start-maximized")
options.add_argument('window-size=1920x1080')
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
options.add_argument(f'user-agent={userAgent}')
options.add_argument('--proxy-server={}'.format(PROXY))
path = os.path.abspath (os.path.dirname (sys.argv[0]))
if platform == "win32": cd = '/chromedriver.exe'
elif platform == "linux": cd = '/chromedriver'
elif platform == "darwin": cd = '/chromedriver'
driver = webdriver.Chrome (service=srv, options=options)
waitWebDriver = WebDriverWait (driver, 10)
link = "https://whatismyipaddress.com/"
driver.get(link)
driver.save_screenshot("whatismyipaddress.png")
time.sleep(WAIT)
soup = BeautifulSoup (driver.page_source, 'html.parser')
tmpIP = soup.find("span", {"id": "ipv4"})
tmpP = soup.find_all("p", {"class": "information"})
for e in tmpP:
tmpSPAN = e.find_all("span")
for e2 in tmpSPAN:
print(e2.text)
print(tmpIP.text)
driver.quit()
Note:
print(f'http://scraperapi:{SCRAPER_API}@proxy-server.scraperapi.com:8001')
and ensure the SCRAPER_API returns a result.
You can find a couple of relevant detailed discussions in: