I am trying to scrape reviews posted in TripAdvisor user profiles with the purpose of analysing reviews for my final year project.So I use following python code with Beautiful Soup library.So my issue is how to click on "show more" button to scrape all the reviews in the profile. https://i.sstatic.net/OWUPt.png image shows the button and it's source code that I need to work on.
This is code I use to scrape data in TripAdvisor user profile
import requests
import csv
import re
from bs4 import BeautifulSoup
maxcount = 50
filename = ""
def writecsv(c1,c2,c3,c4,c5):
with open(filename, mode='a',newline='') as f:
#keys = ['name', 'age', 'job', 'city']
writer = csv.writer(f)
writer.writerow([str(c1),str(c2),str(c3),str(c4),str(c5)])
def onereview(review):
try:
name = review.find(class_="_2fxQ4TOx").get_text() if review.find(class_="_2fxQ4TOx") else ""
reviewTitle = review.find(class_="_3IEJ3tAK _2K4zZcBv").get_text() if review.find(class_="_3IEJ3tAK _2K4zZcBv") else ""
reviewDetails = review.find(class_="_133ThCYf").get_text() if review.find(class_="_133ThCYf") else ""
reviewDate = review.find(class_="_3Coh9OJA").get_text() if review.find(class_="_3Coh9OJA") else ""
reviewFor = review.find(class_="_2ys8zX0p ui_link").get_text() if review.find(class_="_2ys8zX0p ui_link") else ""
#print(name)
writecsv(name,reviewTitle,reviewDetails,reviewDate,reviewFor)
except :
print('error')
def allreviews(URL,endcount):
page = requests.get(URL)
html = BeautifulSoup(page.content, 'html.parser')
body = html.find('body')
contents = body.find("div", {"id": "content"}).div
review = contents.div
for el in range(endcount):
try:
onereview(review.find_next_sibling('div'))
review = review.find_next_sibling('div')
except:
print('review not found')
break
def getallReviewsBymainUrl(URL):
global filename
page = requests.get(URL)
html = BeautifulSoup(page.content, 'html.parser')
count = html.find('body').find(class_="iX3IT_XP").get_text().replace(',','')
username = html.find('body').find(class_="_2wpJPTNc _345JQp5A").get_text().replace(' ','_')
filename = username+".csv"
print('start to fill '+filename)
with open(filename, mode='w') as f:
writer = csv.writer(f)
writer.writerow([str('user name'),str('reviewTitle'),str('reviewDetails'),str('reviewDate'),str('reviewFor')])
endcount = int(maxcount) if int(count) > int(maxcount) else int(count)
allreviews(URL,endcount)
print('save reviews in page = ',str(endcount),' user = ',filename)
print()
URLs = ['https://www.tripadvisor.com/Profile/KatieTuesday?fid=ba3cc2e7-5d8d-404e-88bd-f7f30',
'https://www.tripadvisor.com/Profile/elmila?fid=6497e4e4-1314-487b-a883-a6b519bc7efb'
]
for url in URLs:
try:
getallReviewsBymainUrl(url)
except:
print('There is a mistake, check again '+url)
print()
print('program is end, Thank you.')
I tried following lines to click the button but it doesn't work.And also I cannot Find the url of "Show more" button in the source code.This is the URL of the user profile in tripAdvisor that I used to scrape
button = body.find("button", {"class": "_1JOGv2rJ _2oWqCEVy _3yBiBka1 _3fiJJkxX"})
button.click()
I would be grateful if anyone can help me with this issue.
U can use selenium
to click on a button like this:
from selenium import webdriver
driver = webdriver.Chrome() #Opens a chrome browser. U should install a chromedriver for this and save it in the path where you have this program. You can download it from here: https://chromedriver.chromium.org/downloads
driver.get('https://www.tripadvisor.com/Profile/wwkalex-fernando?tab=reviews') #Opens the url
driver.find_element_by_class_name("button class name").click() #Finds the button with the mentioned class name and clicks on the it. Make sure that you specify the button's class name (the btn's class name given in the screenshot was very long, so I did not type it as it would be prone to mistakes). U can also use xpaths or css selectors instead of class names.
Hope that this helps!