I'm trying to scrape product prices from the website Ultra Liquors using Python and Selenium, but I'm unable to retrieve the price despite the HTML containing the expected elements. My goal is to compare prices from several shops to find the best deals or any ongoing specials for our venue.
Here's the code I'm using:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
# Set up Chrome options
options = Options()
options.add_argument('--headless') # Run in headless mode
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
# Initialize Chrome driver
service = Service('path_to_chromedriver') # Replace with your path to chromedriver
driver = webdriver.Chrome(service=service, options=options)
# Open the product page
driver.get('https://george.ultraliquors.co.za/olof-bergh-olof-bergh-brandy-750ml')
try:
# Attempt to retrieve product name
product_name = driver.find_element(By.XPATH, '//h1[@class="product-title"]').text
print(f"Product Name: {product_name}")
except Exception as e:
print(f"Could not locate product name: {e}")
try:
# Attempt to retrieve price
price_element = driver.find_element(By.CLASS_NAME, 'price-value-10677')
price = price_element.text
print(f"Price: {price}")
except Exception as e:
print(f"Could not locate price: {e}")
# Close the driver
driver.quit()
I expect to get the price value 'R169.99', but the script is not finding it and returns an error message. I've tried using different element locators and checking if the element is dynamically loaded.
I'm using Python 3.12, Selenium 4.8, and ChromeDriver. Any help would be greatly appreciated!
You should use bs4 because bs4 is faster than selenium (if you don't have to deal with bot protection),
I used this endpoint https://george.ultraliquors.co.za/getFilteredProducts
which accepts a POST
request with JSON body, you can get all the product prices under the category of SPIRITS
--> BRANDY
and your catagoryId
is 4
there are lots of category in your target apps out of them your target category number is 4
, here is the sample code with bs4
and requests
library,
import requests
from bs4 import BeautifulSoup
import json
# https://www.makro.co.za Result as you said
url = 'https://www.makro.co.za/makhybris/v2/makro/category/JG/search?channelType=WEB&fields=LIGHT&query=:relevance&userType=B2C&pos=M10'
def getPrice_two(url):
resp = requests.get(url)
for code in resp.json()['contentSlots']['contentSlot'][0]['components']['component'][0]['facetTileDataList']:
data = {"variables":{"categoryId":f"{code['code']}","keyword":"","filterQuery":{},"offset":0,"sortBy":"relevance","sortOrder":"desc","storeId":"M10","dynamicPriceRange":True,"customerDetails":{"customerType":"B2C","targetGroups":[]}}}
n, data_next = 0, data
while True:
url = 'https://www.makro.co.za/wmapi/bff/graphql/CategoryListing/49bf7b2507b2c0ad40dc253614b8d5fb9b1834cb677a6c558aba06f4f399ff9f?channelType=WEB'
header = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0",
"Wm_tenant_id": "30"
}
resp = requests.post(url, json=data_next, headers=header)
for i in resp.json()['data']['categoryListing']['data']['results']['items']:
values = [i['itemDetails']['itemInfo']['genericName'],i['itemDetails']['price']['basePrice']]
print(values)
try:
pagination = resp.json()['data']['categoryListing']['data']['results']['pagination']['nextPageOffset']
data_next = {"variables":{"categoryId":f"{code['code']}","keyword":"","filterQuery":{},"offset":pagination,"sortBy":"relevance","sortOrder":"desc","storeId":"M10","dynamicPriceRange":True,"customerDetails":{"customerType":"B2C","targetGroups":[]}}}
except TypeError:
break
getPrice_two(url)
#https://shop.liquorcity.co.za Result as you said
def getUserId(store_name):
url = 'https://shop.liquorcity.co.za/api/marketplace/marketplace_get_city_storefronts_v3?domain_name=shop.liquorcity.co.za&post_to_get=1&marketplace_reference_id=01c838877c7b7c7d9f15b8f40d3d2980&marketplace_user_id=742314&latitude=-26.1192269&longitude=28.0264195&filters=undefined&skip=0&limit=250&self_pickup=1&source=0&dual_user_key=0&language=en'
resp = requests.get(url).json()
for i in resp['data']:
if store_name in i['storepage_slug']:
return i['storefront_user_id']
def getCatagory(store_name):
user_id = getUserId(store_name)
url = 'https://shop.liquorcity.co.za/api/catalogue/get'
data = {"marketplace_user_id":742314,"user_id":user_id,"date_time":"2024-11-04T10:52:56.815Z","show_all_sub_categories":1,"domain_name":"shop.liquorcity.co.za","dual_user_key":0,"language":"en"}
resp = requests.post(url, json=data).json()
catagory_ids = []
for i in resp['data']['result']:
catagory_ids.append(i['catalogue_id'])
for sub in i['sub_categories']:
catagory_ids.append(sub['catalogue_id'])
for id in catagory_ids:
url = 'https://shop.liquorcity.co.za/api/get_products_for_category'
data_ctgry = data = {"parent_category_id":id,"page_no":1,"offset":0,"limit":500,"marketplace_user_id":742314,"user_id":user_id,"date_time":"2024-11-04T11:46:05.410Z","domain_name":"shop.liquorcity.co.za","dual_user_key":0,"language":"en"}
resp = requests.post(url, json=data).json()
for i in resp['data']:
values = [i['layout_data']['lines'][0]['data'], i['layout_data']['lines'][1]['data'], i['layout_data']['lines'][2]['data']]
print(values)
getCatagory('Mosselbay')
#https://www.checkers.co.za Please add a loop for all pages i just included page 1 for the demo
def getPrice(searchItem, page):
url = f'https://www.checkers.co.za/c-2256/All-Departments?q=%3Arelevance%3AallCategories%3A{searchItem}%3AbrowseAllStoresFacetOff%3AbrowseAllStoresFacetOff&page={page}'
header = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0"
}
resp = requests.get(url, headers=header).text
soup = BeautifulSoup(resp, 'lxml')
for i in soup.findAll(class_='product-frame'):
value = json.loads(i['data-product-ga'])
name = value['name']
price = value['price']
img = value['product_image_url']
data = [name, price]
print(data)
getPrice('drinks', '1')
#https://www.ngf.co.za Result as you said
page = -1
while True:
page = page + 1
url = f'https://www.ngf.co.za/product-category/spirits/page/{page}/'
resp = requests.get(url).text
soup = BeautifulSoup(resp, 'lxml')
get_details = soup.findAll(class_='box-text box-text-products text-center grid-style-2')
if get_details:
for i in get_details:
details = [i.find('a').text, i.find('bdi').text]
print(details)
else:
break
#https://george.ultraliquors.co.za Previous One
url = "https://george.ultraliquors.co.za/getFilteredProducts"
page = 0
while True:
page = page + 1
data = {"categoryId":"3","manufacturerId":"0","vendorId":"0","pageNumber":page,"orderby":"5","viewmode":None,"pagesize":0,"queryString":"","shouldNotStartFromFirstPage":True,"keyword":"","searchCategoryId":"0","searchManufacturerId":"0","searchVendorId":"0","priceFrom":"","priceTo":"","includeSubcategories":"False","searchInProductDescriptions":"False","advancedSearch":"False","isOnSearchPage":"False","inStockFilterModel":None}
header = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0"
}
r = requests.post(url, json=data, headers=header).text
soup = BeautifulSoup(r, 'lxml')
html_page = soup.findAll('div', class_='product-item product-box-product-item product-box-grid')
if html_page:
for i in html_page:
product = i.find(class_="product-title-product-box").text.strip()
price = f"{i.find(class_='price actual-price').text.strip()}.{i.find(class_='price actual-price-cents').text.strip()}"
size = i.find(class_='desktop-product-box-pack-size').text.strip()
product_url = f"https://george.ultraliquors.co.za{i.find(class_='product-title-product-box')['href']}"
all_details = [product, price, size, product_url]
print(all_details)
else:
break
['1000 POUNDER RUM', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/1000-pounder-1000-pounder-rum-750ml-2']
['1000 POUNDER RUM', 'R1479.00', '6 x 750ML', 'https://george.ultraliquors.co.za/1000-pounder-1000-pounder-rum-750ml-x-6']
['ABERLOUR 12YR MALT TIN', 'R809.99', '1 x 750ML', 'https://george.ultraliquors.co.za/aberlour-aberlour-12yr-malt-tin-750ml-3']
['ABERLOUR 12YR MALT TIN', 'R4779.00', '6 x 750ML', 'https://george.ultraliquors.co.za/aberlour-aberlour-12yr-malt-tin-750ml-x-6']
['ABERLOUR 16YR MALT TIN', 'R1539.00', '1 x 750ML', 'https://george.ultraliquors.co.za/aberlour-aberlour-16yr-malt-tin-750ml']
['ABERLOUR 16YR MALT TIN', 'R8419.00', '6 x 750ML', 'https://george.ultraliquors.co.za/aberlour-aberlour-16yr-malt-tin-750ml-x-6']
['ABSOLUT VODKA BLUE', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-blue-750ml']
['ABSOLUT VODKA BLUE', 'R2949.00', '12 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-blue-750ml-x-12']
['ABSOLUT VODKA GRAPEFRUIT', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-grapefruit-750ml']
['ABSOLUT VODKA GRAPEFRUIT', 'R2949.00', '12 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-grapefruit-750ml-x-12']
['ABSOLUT VODKA LIME', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-lime-750ml']
['ABSOLUT VODKA LIME', 'R2949.00', '12 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-lime-750ml-x-12']
['ABSOLUT VODKA RASPBERRI', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-raspberri-750ml-2']
['ABSOLUT VODKA RASPBERRI', 'R2949.00', '12 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-raspberri-750ml-x-12-2']
['ABSOLUT VODKA WATERMELON', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-watermelon-750ml']
['ABSOLUT VODKA WATERMELON', 'R2949.00', '12 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-watermelon-750ml-x-12']
['AERSTONE SINGLE MALT LAND CASK', 'R444.99', '1 x 750ML', 'https://george.ultraliquors.co.za/aerstone-aerstone-single-malt-land-cask-750ml-2']
['AERSTONE SINGLE MALT LAND CASK', 'R2619.00', '6 x 750ML', 'https://george.ultraliquors.co.za/aerstone-aerstone-single-malt-land-cask-750ml-x-6']
Let me know if this is ok for your