I am trying to webscrape the name, price, and description of products listed on an online shop. The website link is https://eshop.nomin.mn/n-foods.html
When I look through the HTML code of the page, I get the relevant div class containers but when I reference it in my code as such, I get no values when I run my spider. I think one reason would be if the website is Javascript based and is dynamic which would require me to use Splash. However, I don't think this is the case for my issue.
def parse(self, response, **kwargs):
cards = response.xpath('//div[@class="item-itemmainroot-1lZ"]')
# parse details
for card in cards:
price = card.xpath(".//a[contains(@class, 'item-nameLenght-K5Z item-name-3TH')]/span()/text()").extract()
Full Code:
import scrapy
import re
class TempSpider(scrapy.Spider):
name = 'temp_spider'
allowed_domains = ['https://eshop.nomin.mn/']
start_urls = ['https://eshop.nomin.mn/n-foods.html']
def parse(self, response, **kwargs):
cards = response.xpath('//div[@class="item-itemmainroot-1lZ"]')
# parse details
for card in cards:
price = card.xpath(".//a[contains(@class, 'item-nameLenght-K5Z item-name-3TH')]/span()/text()").extract()
item = {'price': price
}
yield item
[1]: https://i.sstatic.net/iokmo.png
All and any help is greatly appreciated. I can't seem to figure out what I am doing wrong.
Use the websites data api instead of the website url that you visit in your browser. It will return a json object that has all the information you are looking for.
import scrapy
import re
class TempSpider(scrapy.Spider):
name = 'temp_spider'
allowed_domains = ['https://eshop.nomin.mn/']
start_urls = ['https://eshop.nomin.mn/graphql?query=query+category($pageSize:Int!$currentPage:Int!$filters:ProductAttributeFilterInput!$sort:ProductAttributeSortInput){products(pageSize:$pageSize+currentPage:$currentPage+filter:$filters+sort:$sort){items{id+name+sku+brand+salable_qty+brand_name+c21_available+c21_business_type+c21_reference+c21_street+c21_area+c21_bed_room+mp_daily_deal{created_at+date_from+date_to+deal_id+deal_price+remaining_time+deal_qty+discount_label+is_featured+product_id+product_name+product_sku+sale_qty+status+store_ids+updated_at+__typename}new_to_date+short_description{html+__typename}productAttributes{name+value+__typename}price{regularPrice{amount{currency+value+__typename}__typename}__typename}special_price+special_to_date+thumbnail{file_small+url+__typename}url_key+url_suffix+mp_label_data{enabled+name+priority+label_template+label_image+to_date+__typename}...on+ConfigurableProduct{variants{product{sku+special_price+price{regularPrice{amount{currency+value+__typename}__typename}__typename}__typename}__typename}__typename}__typename}page_info{total_pages+__typename}total_count+__typename}}&operationName=category&variables={"currentPage":1,"id":24175,"filters":{"category_id":{"in":"24175"}},"pageSize":50,"sort":{"position":"DESC"}}']
def parse(self, response, **kwargs):
data = response.json()
print(data.keys())
for item in data['data']["products"]["items"]:
yield {
"name": item["name"],
"price": item["price"]["regularPrice"]["amount"]["value"]
}
Partial OUTPUT
{'name': 'Хиам Аялал кг', 'price': 19559}
{'name': 'Чихэр Княжеские 1кг', 'price': 24859}
{'name': 'Жимсний чанамал Mr', 'price': 11999}
{'name': 'Vit C ', 'price': 28799}
{'name': 'Жүүс Моя семья', 'price': 3629}
{'name': 'Муурны ялгадас шингээх', 'price': 31999}
{'name': 'Компот Vidan 920гр', 'price': 8879}
{'name': 'Мөс 0.5кг 024218', 'price': 2029}
{'name': 'Өргөст хэмх Hainich', 'price': 7799}
{'name': 'Соус чилитэй 215гр', 'price': 9499}
{'name': 'Цай Ottogi улаан', 'price': 14299}
{'name': 'Цай шингэн Pfanner', 'price': 9379}
{'name': '02381088', 'price': 3179}
{'name': 'Өглөөний хоол G&G', 'price': 8239}
{'name': '02S003167', 'price': 7699}
{'name': '02S003133', 'price': 8299}
{'name': 'Кофе Жокей империал', 'price': 14279}
{'name': 'Жүүс Pfanner orange', 'price': 13129}
{'name': 'Цуу улаан дарсны', 'price': 6939}
{'name': 'Оливын тос Borges', 'price': 14749}
{'name': 'Оливын тос classic', 'price': 33629}
{'name': 'Оливын тос Borges', 'price': 18629}
{'name': 'Гоймон Borges Fusilli', 'price': 5939}
{'name': 'Цай шингэн чавганы', 'price': 2469}
{'name': 'Гоймон Нүүдэл 500гр', 'price': 3759}
{'name': 'Муурны хоол 85гр', 'price': 1889}
{'name': 'Бэлэн Карри зөөлөн', 'price': 7499}
{'name': 'Цай Dr.Baatar 2гр*16ш', 'price': 11999}
{'name': 'Нухаш Urbanek ', 'price': 6979}
{'name': 'Вандуй лууван холимог', 'price': 5899}
{'name': 'Өргөст хэмх Bagro', 'price': 13499}
{'name': 'Бэлэн хоол Samyang', 'price': 6189}
{'name': 'Жүүс Naturalis apple', 'price': 1589}
{'name': 'Жүүс Naturalis Apple-grape', 'price': 5999}
{'name': 'Жүүс Naturalis Apple-sour', 'price': 5999}
{'name': 'Жүүс Vita Pomegranate', 'price': 3659}
{'name': 'Шоколад Luna 33гр', 'price': 1499}
{'name': 'Жүүс Фруктовый Сад', 'price': 5999}
{'name': 'Жүүс Фруктовый Сад', 'price': 5299}
{'name': 'Жүүс Фруктовый Сад', 'price': 5299}
{'name': 'Жүүс Фруктовый Сад', 'price': 5299}
{'name': 'Жүүс Фруктовый Сад', 'price': 5299}
You can find the url for the api in the network tab in your browsers devtools...