I have this URL that I am trying to scrape: https://papemelroti.com/products/live-free-badge
But it seems that I can't find this table class
<table class="hulkapps-table table"><thead><tr><th style="border-top-left-radius: 0px;">Quantity</th><th style="border-top-right-radius: 0px;">Bulk Discount</th><th style="display: none">Add to Cart</th></tr></thead><tbody><tr><td style="border-bottom-left-radius: 0px;">Buy 50 + <span class="hulk-offer-text"></span></td><td style="border-bottom-right-radius: 0px;"><span class="hulkapps-price"><span class="money"><span class="money"> ₱1.00 </span></span> Off</span></td><td style="display: none;"><button type="button" class="AddToCart_0" style="cursor: pointer; font-weight: 600; letter-spacing: .08em; font-size: 11px; padding: 5px 15px; border-color: #171515; border-width: 2px; color: #ffffff; background: #161212;" onclick="add_to_cart(50)">Add to Cart</button></td></tr></tbody></table>
I already have my Selenium code but it's still not scraping it. Here's my code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
# Set up Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
service = Service('/usr/local/bin/chromedriver') # Adjust path if necessary
driver = webdriver.Chrome(service=service, options=chrome_options)
def get_page_html(url):
driver.get(url)
time.sleep(3) # Wait for JS to load
return driver.page_source
def scrape_discount_quantity(url):
page_html = get_page_html(url)
soup = BeautifulSoup(page_html, "html.parser")
# Locate the table containing the quantity and discount
table = soup.find('table', class_='hulkapps-table')
print(page_html)
if table:
table_rows = table.find_all('tr')
for row in table_rows:
quantity_cells = row.find_all('td')
if len(quantity_cells) >= 2: # Check if there are at least two cells
quantity_cell = quantity_cells[0].get_text(strip=True) # Get quantity text
discount_cell = quantity_cells[1].get_text(strip=True) # Get discount text
return quantity_cell, discount_cell
return None, None
# Example usage
url = 'https://papemelroti.com/products/live-free-badge'
quantity, discount = scrape_discount_quantity(url)
print(f"Quantity: {quantity}, Discount: {discount}")
driver.quit() # Close the browser when done
It keeps on returning 'None'
Discount Data loaded from this https://volumediscount.hulkapps.com/api/v2/shop/get_offer_table
API endpoint, bs4 didn't have that table name to scrape when you returned the page source by using selenium driver.page_source
, I tried your code and confirmed that hulkapps-table
didn't exist in the response! so it's obvious response will be None
,
I used this https://volumediscount.hulkapps.com/api/v2/shop/get_offer_table
API endpoint along with product_id
from this request https://papemelroti.com/products/live-free-badge.json
, here is my code and it's basic:
import requests
import json
def getDiscount(root_url):
prod_resp = requests.get(f'{root_url}.json').content #Get product_id
prod_id = json.loads(prod_resp)['product']['id']
disc_url = 'https://volumediscount.hulkapps.com/api/v2/shop/get_offer_table' #Discount URL
data = f'pid={prod_id}&store_id=papemelroti.myshopify.com'
headers = {
"User-Agent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0",
"Content-Type":"application/x-www-form-urlencoded; charset=UTF-8"
}
resp = requests.post(disc_url, data=data, headers=headers)
disc_json = json.loads(resp.json()['eligible_offer']['offer_levels'])[0]
#Offer has two variants: 'Price' and 'Off' so you can use condition if you like to scrape products other than 'live-free-badge'
if 'price_discount' in disc_json[2]:
print(f"Product ID:{prod_id} (Quantity: {disc_json[0]}, Discount: {disc_json[1]} Price discount)")
elif 'Off' in disc_json[2]:
print(f"Product ID:{prod_id} (Quantity: {disc_json[0]}, Discount: {disc_json[1]}% Off)")
#sample for both 'Off' and 'Price'
getDiscount('https://papemelroti.com/products/dear-me-magnet')
getDiscount('https://papemelroti.com/products/live-free-badge')
Product ID:7217967726790 (Quantity: 50, Discount: 10% Off)
Product ID:104213217289 (Quantity: 50, Discount: 1.00 Price discount)
Let me know if this is ok or if you want to be strict with selenium