pythonscreen-scraping

AttributeError: 'NoneType' object has no attribute 'text' when scraping Ebay product titles


Following this tutorial to create an Ebay Price Tracker with Python, I am encountering an AttributeError: 'NoneType' object has no attribute 'text' when trying to get the title of a product from a search results page in Ebay.

The class is the right one, as you can see here:

'title': item.find('h3', {'class': 's-item__title s-item__title--has-tags'}).text,

Any idea of why I am getting this error and how to bypass it?

Here is the entire code:

import requests
from bs4 import BeautifulSoup
import pandas as pd

searchterm = 'screen'

def get_data(searchterm):
    url = f'https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2380057.m570.l1313&_nkw={searchterm}&_sacat=0&LH_PrefLoc=1&LH_Auction=1&rt=nc&LH_Sold=1&LH_Complete=1'
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    return soup

def parse(soup):
    productslist = []
    results = soup.find_all('div',{'class': 's-item__info clearfix'})
    for item in results:
        product = {
            'title': item.find('h3', {'class': 's-item__title s-item__title--has-tags'}).text,
            'soldprice': float(item.find('span', {'class': 's-item__price'}).text.replace('$','').replace(',','').strip()),
            'solddate': item.find('span', {'class': 's-item__title--tagblock__COMPLETED'}).find('span',{'class': 'POSITIVE'}.text),
            'bids': item.find('span', {'class': 's-item__bids'}).text,
            'link': item.find('a', {'class': 's-item__link'})['href'],
        }
        productslist.append(product)
    return productslist

def output (productslist, searchterm):
    productsdf = pd.DataFrame(productslist)
    productsdf.to_csv(searchterm + 'ebaytrackeroutput.csv', index=False)
    print('Saved to CSV')
    return

soup = get_data(searchterm)
productslist = parse(soup)
output(productslist, searchterm)

                           

Thank you for your help!


Solution

  • Some items are without title or soldprice. They will return none. And you will get that error. So you need to skip them.

    Another thing is item.find('span', {'class': 's-item__title--tagblock__COMPLETED'}) this line is always returning none. So you need to check why.

    To learn: How to debug small programs

    import requests
    from bs4 import BeautifulSoup
    import pandas as pd
    
    searchterm = 'screen'
    
    def get_data(searchterm):
        url = f'https://www.ebay.com/sch/i.html?_from=R40&_trksid=p2380057.m570.l1313&_nkw={searchterm}&_sacat=0&LH_PrefLoc=1&LH_Auction=1&rt=nc&LH_Sold=1&LH_Complete=1'
        r = requests.get(url)
        soup = BeautifulSoup(r.text, 'html.parser')
        return soup
    
    def parse(soup):
        productslist = []
        results = soup.find_all('div',{'class': 's-item__info clearfix'})
        for item in results:
            title = item.find('h3', {'class': 's-item__title s-item__title--has-tags'})
            soldprice = item.find('span', {'class': 's-item__price'})
            if title == None or soldprice == None: # if these are none just skip them.
                continue
            # solddate is always returning none you need to check why
            product = {
                'title': title.text,
                'soldprice': float(soldprice.text.replace('$','').replace(',','').strip()),
                # 'solddate': item.find('span', {'class': 's-item__title--tagblock__COMPLETED'}).find('span',{'class': 'POSITIVE'}.text),
                'bids': item.find('span', {'class': 's-item__bids'}).text,
                'link': item.find('a', {'class': 's-item__link'})['href'],
            }
            productslist.append(product)
        return productslist
    
    def output (productslist, searchterm):
        productsdf = pd.DataFrame(productslist)
        productsdf.to_csv(searchterm + 'ebaytrackeroutput.csv', index=False)
        print('Saved to CSV')
        return
    
    soup = get_data(searchterm)
    productslist = parse(soup)
    output(productslist, searchterm)