pythonselenium-webdriverweb-scrapingbeautifulsouppython-requests

web scraper is not grabbing desired text


I am trying to scrape the sku and description on this site: https://www.milwaukeetool.com/products/power-tools/drilling/drill-drivers

but, it wont scrape the desired elements despite the code being able to run. Does anyone know why? it looks like I am grabbing the correct elements, I have tried using both requests and selenium (as seen below) and keep getting the same result.

Requests method:

import requests
import pandas as pd
from bs4 import BeautifulSoup

link = 'https://www.milwaukeetool.com/products/power-tools/drilling/drill-drivers'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
}

res = requests.get(link,headers=headers)
soup = BeautifulSoup(res.text,"html.parser")
df = pd.DataFrame(columns= ['sku','desc'])
for item in soup.select("#MTBody > main > section > div.product-listing-main.pt-5.md\:pt-\[30px\] > section:nth-child(1) > div > div > div > div:nth-child(2) > div > a > div.result-title__wrap.absolute.inset-0.top-auto.bg-gray-300.pt-\[5px\].md\:pt-2.px-1.md\:px-4.w-full.text-gray-800.text-center.h-\[75px\]"):
    sku = item.select_one("#MTBody > main > section > div.product-listing-main.pt-5.md\:pt-\[30px\] > section:nth-child(1) > div > div > div > div:nth-child(2) > div > a > div.result-title__wrap.absolute.inset-0.top-auto.bg-gray-300.pt-\[5px\].md\:pt-2.px-1.md\:px-4.w-full.text-gray-800.text-center.h-\[75px\] > span").get_text(strip=True)
    desc = item.select_one("#MTBody > main > section > div.product-listing-main.pt-5.md\:pt-\[30px\] > section:nth-child(1) > div > div > div > div:nth-child(2) > div > a > div.result-title__wrap.absolute.inset-0.top-auto.bg-gray-300.pt-\[5px\].md\:pt-2.px-1.md\:px-4.w-full.text-gray-800.text-center.h-\[75px\] > div.text-brandBlack.font-helvetica67.text-14.result-title.leading-none.max-h-8.overflow-hidden").get_text(strip=True)
    df = pd.concat([df, pd.DataFrame({'sku': [sku], 'desc': [desc]})], ignore_index=True)
    print(sku,desc)

df.to_csv("milwaukee.csv",index=False)

Selenium method:

import undetected_chromedriver as uc

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import time
from selenium.common.exceptions import NoSuchElementException

options = Options()
driver = uc.Chrome()

website = 'https://www.milwaukeetool.com/products/power-tools/drilling/drill-drivers'
driver.get(website)


product_list = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".product-listing-main.pt-5.md\\:pt-\\[30px\\]")))
prod_num = []
prod_desc = []

for container in product_list:
    sku = container.find_element(By.CSS_SELECTOR, '.font-helvetica67.tracking-normal.uppercase.text-gray-900.text-12.result-sku.leading-none').text
    description = container.find_element(By.CSS_SELECTOR, '.text-brandBlack.font-helvetica67.text-14.result-title.leading-none.max-h-8.overflow-hidden').text
    prod_num.append(sku)
    prod_desc.append(description)


for _ in range(4):
        driver.execute_script("window.scrollBy(0, 2000);")
        time.sleep(2)




driver.quit()
print(len(prod_num))
print(len(prod_desc))
# Create a DataFrame from the scraped data

df = pd.DataFrame({'code': prod_num, 'description': prod_desc})

# Save the DataFrame to a CSV file
df.to_csv('milwtest1.csv', index=False)

print(df)

Solution

  • The page is dynamically rendered, so using requests alone is not going to work.

    There is an API behind the site but it uses a category ID to retrieve the data. So my approach would have two parts:

    1. use the web page to retrieve the category ID (doesn't contain the actual data because that's retrieved dynamically) and
    2. use the API to retrieve the data.
    import re
    import requests
    from pandas import DataFrame
    
    URL = "https://www.milwaukeetool.com/products/power-tools/drilling/drill-drivers"
    
    # GET CATEGORY ID -------------------------------------------------------------
    
    response = requests.get(URL)
    
    pattern = r'\\\"page_id\\\":\\\"{(\w{8}-\w{4}-\w{4}-\w{4}-\w{12})}\\\"'
    
    category_id = re.search(pattern, response.text).groups()[0]
    
    # PULL CATEGORY FROM API ------------------------------------------------------
    
    data = {
        'language': 'en',
        'returnAll': True,
        'categories': category_id,
    }
    
    response = requests.post('https://www.milwaukeetool.com/api/v1/products/listing', json=data)
    
    
    results = response.json()["data"]["results"]
    
    products = []
    
    for result in results:
        products.append({
            "sku": result["sku"],
            "description": result["title"],
        })
    
    products = DataFrame(products)
    print(products)
    

    Results look like this:

            sku                                        description
    0   2905-20              M18 FUEL™ ½” Drill/Driver w/ ONE-KEY™
    1   2903-20                        M18 FUEL™ 1/2" Drill/Driver
    2   3403-20                        M12 FUEL™ 1/2" Drill/Driver
    3   2505-20    M12 FUEL™ Installation Drill/Driver (Tool-Only)
    4   2803-20            M18 FUEL™ 1/2" Drill Driver (Tool Only)
    5   2804-20       M18 FUEL™ ½” Hammer Drill/Driver (Tool Only)
    6   2503-20            M12 FUEL™ 1/2" Drill Driver (Tool Only)
    7   2810-20   M18 FUEL™ Mud Mixer with 180° Handle (Tool Only)
    8   3602-20    M18™ Compact Brushless 1/2" Hammer Drill/Driver
    9   3601-20          M18™ Compact Brushless 1/2" Drill/ Driver
    10  2902-20        M18 1/2" Brushless Hammer Drill (Tool Only)
    11  2801-20  M18 Compact Brushless 1/2" Drill Driver Bare Tool
    12  2407-20                 M12™ 3/8” Drill/Driver (Tool Only)
    13  2606-20                             M18™ 1/2" Drill Driver
    14  2615-20                             M18™ Right Angle Drill
    15  0726-20        M28™ Cordless 1/2" Hammer Drill (Tool Only)
    16   1107-6                       1/2 D-Handle Drill 0-500 RPM
    17   1610-1                         1/2" Compact Drill 650 RPM
    18   0244-1                     1/2"  Magnum® Drill, 0-700 RPM
    19  0101-20  1/4" Magnum® Drill, 0-4000 RPM with QUIK-LOK® ...
    20  0302-20  1/2"  Magnum® Drill, 0-850 RPM with All Metal ...
    21  0233-20  3/8" Magnum® Drill, 0-2800 RPM with Keyless Chuck
    22   0234-6                     1/2"  Magnum® Drill, 0-950 RPM
    23   1660-6                         1/2" Compact Drill 450 RPM
    24  0100-20  1/4" Magnum® Drill, 0-2500 RPM with QUIK-LOK® ...
    25   1101-1                         1/2 D-Handle Drill 500 RPM
    26  0200-20                     3/8" Magnum® Drill, 0-1200 RPM
    27  0240-20                                         3/8" Drill
    28   1630-1                        1/2"  Compact Drill 900 RPM
    29   1007-1                       1/2 D-Handle Drill 0-600 RPM
    30  0370-20                    3/8"  Close Quarter Angle Drill
    31  0299-20                    1/2"  Magnum®  Drill, 0-850 RPM
    32  0201-20  3/8" Magnum® Drill, 0-2500 RPM with All Metal ...
    33   1001-1                   1/2 in. D-Handle Drill 0-600 RPM
    34  0202-20  3/8" Magnum®  Drill, 0-1200 RPM with All Metal...
    35  0235-21  1/2"  Magnum®  Drill, 0-950 RPM with All Metal...
    36  0300-20                      1/2" Magnum® Drill, 0-850 RPM