pythonselenium-webdriverweb-scrapingcss-selectors

Python Selenium script not retrieving product price from a webpage


I'm trying to scrape product prices from the website Ultra Liquors using Python and Selenium, but I'm unable to retrieve the price despite the HTML containing the expected elements. My goal is to compare prices from several shops to find the best deals or any ongoing specials for our venue.

Here's the code I'm using:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By

# Set up Chrome options
options = Options()
options.add_argument('--headless')  # Run in headless mode
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# Initialize Chrome driver
service = Service('path_to_chromedriver')  # Replace with your path to chromedriver
driver = webdriver.Chrome(service=service, options=options)

# Open the product page
driver.get('https://george.ultraliquors.co.za/olof-bergh-olof-bergh-brandy-750ml')

try:
    # Attempt to retrieve product name
    product_name = driver.find_element(By.XPATH, '//h1[@class="product-title"]').text
    print(f"Product Name: {product_name}")
except Exception as e:
    print(f"Could not locate product name: {e}")

try:
    # Attempt to retrieve price
    price_element = driver.find_element(By.CLASS_NAME, 'price-value-10677')
    price = price_element.text
    print(f"Price: {price}")
except Exception as e:
    print(f"Could not locate price: {e}")

# Close the driver
driver.quit()

I expect to get the price value 'R169.99', but the script is not finding it and returns an error message. I've tried using different element locators and checking if the element is dynamically loaded.

I'm using Python 3.12, Selenium 4.8, and ChromeDriver. Any help would be greatly appreciated!


Solution

  • You should use bs4 because bs4 is faster than selenium (if you don't have to deal with bot protection),

    I used this endpoint https://george.ultraliquors.co.za/getFilteredProducts which accepts a POST request with JSON body, you can get all the product prices under the category of SPIRITS --> BRANDY and your catagoryId is 4 there are lots of category in your target apps out of them your target category number is 4, here is the sample code with bs4 and requests library,

    Sample Code:

    import requests
    from bs4 import BeautifulSoup
    import json
    
    # https://www.makro.co.za Result as you said
    
    url = 'https://www.makro.co.za/makhybris/v2/makro/category/JG/search?channelType=WEB&fields=LIGHT&query=:relevance&userType=B2C&pos=M10'
    def getPrice_two(url):
        resp = requests.get(url)
        for code in resp.json()['contentSlots']['contentSlot'][0]['components']['component'][0]['facetTileDataList']:
            data = {"variables":{"categoryId":f"{code['code']}","keyword":"","filterQuery":{},"offset":0,"sortBy":"relevance","sortOrder":"desc","storeId":"M10","dynamicPriceRange":True,"customerDetails":{"customerType":"B2C","targetGroups":[]}}}
            n, data_next = 0, data
            while True:
                url = 'https://www.makro.co.za/wmapi/bff/graphql/CategoryListing/49bf7b2507b2c0ad40dc253614b8d5fb9b1834cb677a6c558aba06f4f399ff9f?channelType=WEB'
                header = {
                    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0",
                    "Wm_tenant_id": "30"
                }
                resp = requests.post(url, json=data_next, headers=header)
                for i in resp.json()['data']['categoryListing']['data']['results']['items']:
                    values = [i['itemDetails']['itemInfo']['genericName'],i['itemDetails']['price']['basePrice']]
                    print(values)
                try:
                    pagination = resp.json()['data']['categoryListing']['data']['results']['pagination']['nextPageOffset']
                    data_next = {"variables":{"categoryId":f"{code['code']}","keyword":"","filterQuery":{},"offset":pagination,"sortBy":"relevance","sortOrder":"desc","storeId":"M10","dynamicPriceRange":True,"customerDetails":{"customerType":"B2C","targetGroups":[]}}}
                except TypeError:
                    break
    getPrice_two(url)
    
    #https://shop.liquorcity.co.za Result as you said
    
    def getUserId(store_name):
        url = 'https://shop.liquorcity.co.za/api/marketplace/marketplace_get_city_storefronts_v3?domain_name=shop.liquorcity.co.za&post_to_get=1&marketplace_reference_id=01c838877c7b7c7d9f15b8f40d3d2980&marketplace_user_id=742314&latitude=-26.1192269&longitude=28.0264195&filters=undefined&skip=0&limit=250&self_pickup=1&source=0&dual_user_key=0&language=en'
        resp = requests.get(url).json()
        for i in resp['data']:
            if store_name in i['storepage_slug']:
                return i['storefront_user_id']
    
    def getCatagory(store_name):
        user_id = getUserId(store_name)
        url = 'https://shop.liquorcity.co.za/api/catalogue/get'
        data = {"marketplace_user_id":742314,"user_id":user_id,"date_time":"2024-11-04T10:52:56.815Z","show_all_sub_categories":1,"domain_name":"shop.liquorcity.co.za","dual_user_key":0,"language":"en"}
        resp = requests.post(url, json=data).json()
        catagory_ids = []
        for i in resp['data']['result']:
            catagory_ids.append(i['catalogue_id'])
            for sub in i['sub_categories']:
                catagory_ids.append(sub['catalogue_id'])
        for id in catagory_ids:
            url = 'https://shop.liquorcity.co.za/api/get_products_for_category'
            data_ctgry = data = {"parent_category_id":id,"page_no":1,"offset":0,"limit":500,"marketplace_user_id":742314,"user_id":user_id,"date_time":"2024-11-04T11:46:05.410Z","domain_name":"shop.liquorcity.co.za","dual_user_key":0,"language":"en"}
            resp = requests.post(url, json=data).json()
            for i in resp['data']:
                values = [i['layout_data']['lines'][0]['data'], i['layout_data']['lines'][1]['data'], i['layout_data']['lines'][2]['data']]
                print(values)
    
    getCatagory('Mosselbay')
    
    #https://www.checkers.co.za Please add a loop for all pages i just included page 1 for the demo
    
    def getPrice(searchItem, page):
        url = f'https://www.checkers.co.za/c-2256/All-Departments?q=%3Arelevance%3AallCategories%3A{searchItem}%3AbrowseAllStoresFacetOff%3AbrowseAllStoresFacetOff&page={page}'
        header = {
            "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0"
        }
        resp = requests.get(url, headers=header).text
        soup = BeautifulSoup(resp, 'lxml')
        for i in soup.findAll(class_='product-frame'):
            value = json.loads(i['data-product-ga'])
            name = value['name']
            price = value['price']
            img = value['product_image_url']
            data = [name, price]
            print(data)
    
    getPrice('drinks', '1')
    
    #https://www.ngf.co.za Result as you said
    
    page = -1
    while True:
        page = page + 1
        url = f'https://www.ngf.co.za/product-category/spirits/page/{page}/'
        resp = requests.get(url).text
        soup = BeautifulSoup(resp, 'lxml')
        get_details = soup.findAll(class_='box-text box-text-products text-center grid-style-2')
        if get_details:
            for i in get_details:
                details = [i.find('a').text, i.find('bdi').text]
                print(details)
        else:
            break
    
    #https://george.ultraliquors.co.za Previous One
    
    url = "https://george.ultraliquors.co.za/getFilteredProducts"
    page = 0
    while True:
        page = page + 1
        data = {"categoryId":"3","manufacturerId":"0","vendorId":"0","pageNumber":page,"orderby":"5","viewmode":None,"pagesize":0,"queryString":"","shouldNotStartFromFirstPage":True,"keyword":"","searchCategoryId":"0","searchManufacturerId":"0","searchVendorId":"0","priceFrom":"","priceTo":"","includeSubcategories":"False","searchInProductDescriptions":"False","advancedSearch":"False","isOnSearchPage":"False","inStockFilterModel":None}
        header = {
            "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:131.0) Gecko/20100101 Firefox/131.0"
        }
    
        r = requests.post(url, json=data, headers=header).text
        soup = BeautifulSoup(r, 'lxml')
        html_page = soup.findAll('div', class_='product-item product-box-product-item product-box-grid')
        if html_page:
            for i in html_page:
                product = i.find(class_="product-title-product-box").text.strip()
                price = f"{i.find(class_='price actual-price').text.strip()}.{i.find(class_='price actual-price-cents').text.strip()}"
                size = i.find(class_='desktop-product-box-pack-size').text.strip()
                product_url = f"https://george.ultraliquors.co.za{i.find(class_='product-title-product-box')['href']}"
                all_details = [product, price, size, product_url]
                print(all_details)
        else:
            break
    

    Sample output:

    ['1000 POUNDER RUM', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/1000-pounder-1000-pounder-rum-750ml-2']
    ['1000 POUNDER RUM', 'R1479.00', '6 x 750ML', 'https://george.ultraliquors.co.za/1000-pounder-1000-pounder-rum-750ml-x-6']
    ['ABERLOUR 12YR MALT TIN', 'R809.99', '1 x 750ML', 'https://george.ultraliquors.co.za/aberlour-aberlour-12yr-malt-tin-750ml-3']
    ['ABERLOUR 12YR MALT TIN', 'R4779.00', '6 x 750ML', 'https://george.ultraliquors.co.za/aberlour-aberlour-12yr-malt-tin-750ml-x-6']
    ['ABERLOUR 16YR MALT TIN', 'R1539.00', '1 x 750ML', 'https://george.ultraliquors.co.za/aberlour-aberlour-16yr-malt-tin-750ml']
    ['ABERLOUR 16YR MALT TIN', 'R8419.00', '6 x 750ML', 'https://george.ultraliquors.co.za/aberlour-aberlour-16yr-malt-tin-750ml-x-6']
    ['ABSOLUT VODKA BLUE', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-blue-750ml']
    ['ABSOLUT VODKA BLUE', 'R2949.00', '12 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-blue-750ml-x-12']
    ['ABSOLUT VODKA GRAPEFRUIT', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-grapefruit-750ml']
    ['ABSOLUT VODKA GRAPEFRUIT', 'R2949.00', '12 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-grapefruit-750ml-x-12']
    ['ABSOLUT VODKA LIME', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-lime-750ml']
    ['ABSOLUT VODKA LIME', 'R2949.00', '12 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-lime-750ml-x-12']
    ['ABSOLUT VODKA RASPBERRI', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-raspberri-750ml-2']
    ['ABSOLUT VODKA RASPBERRI', 'R2949.00', '12 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-raspberri-750ml-x-12-2']
    ['ABSOLUT VODKA WATERMELON', 'R249.99', '1 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-watermelon-750ml']
    ['ABSOLUT VODKA WATERMELON', 'R2949.00', '12 x 750ML', 'https://george.ultraliquors.co.za/absolut-absolut-vodka-watermelon-750ml-x-12']
    ['AERSTONE SINGLE MALT LAND CASK', 'R444.99', '1 x 750ML', 'https://george.ultraliquors.co.za/aerstone-aerstone-single-malt-land-cask-750ml-2']
    ['AERSTONE SINGLE MALT LAND CASK', 'R2619.00', '6 x 750ML', 'https://george.ultraliquors.co.za/aerstone-aerstone-single-malt-land-cask-750ml-x-6']
    

    Let me know if this is ok for your