I want to get movie ratings from this page for example and print ratings line by line, I've extracted names and release year with BS4 but don't know how to handle ratings...
import requests
from bs4 import BeautifulSoup
import urllib.request
url = urllib.request.urlopen('http://imdb.com/list/ls097228983/')
content = url.read()
soup = BeautifulSoup(content, 'lxml')
for div in soup.findAll('h3', attrs={'class':'lister-item-header'}):
#print(div.find('a')['href'])
#print("**")
#print(div)
year = div.find('span', attrs={'class':'lister-item-year text-muted unbold'})
year = str(year)
year = year.replace('<span class="lister-item-year text-muted unbold">', '')
year = year.replace('</span>', '')
name = div.find('a').contents[0]
print(name + ' ' + year)
>> I want: Solaris (1972) 8.1
you need to change 'class':'lister-item-header'
to 'class':'lister-item-content'
parent class to catch the rating.
import requests
from bs4 import BeautifulSoup
import urllib.request
url = urllib.request.urlopen('http://imdb.com/list/ls097228983/')
content = url.read()
soup = BeautifulSoup(content, 'lxml')
for div in soup.findAll('div', {'class':'lister-item-content'}):
#print(div.find('a')['href'])
#print("**")
#print(div)
year = div.find('span', attrs={'class':'lister-item-year text-muted unbold'})
year = str(year)
year = year.replace('<span class="lister-item-year text-muted unbold">', '')
year = year.replace('</span>', '')
name = div.find('a').contents[0]
rating = div.find('span',class_='ipl-rating-star__rating').text
# print(rating)
# you could also format string.
print(f'{name} {year} {rating}'.format(name, year, rating))
print(name + ' ' + year + " " +rating)