Hello I am trying to get into word scrapers and crawlers however I don't understand why my code is not going to the next page and looping.
import scrapy
from scrapy import*
import scrapy
from scrapy import*
class SpiderSpider(scrapy.Spider):
name = 'spider'
start_urls = ['https://www.thehousedirectory.com/category/interior-designers-architects/london-interior-designers/']
def parse(self, response):
allbuyers = response.xpath('//div[@class="company-details"]')
for buyers in allbuyers:
name = buyers.xpath('.//div/a/h2/text()').extract_first()
email = buyers.xpath('.//p/a[contains(text(),"@")]/text()').extract_first()
yield{
'Name' : name,
'Email' : email,
}
next_url = response.css('#main > div > nav > a.next.page-numbers')
if next_url:
print("test")
url = response.xpath("href").extract()
yield scrapy.Request(url, self.parse)
What you did to get the next page doesn't really make any sense. To be specific, this line I meant url = response.xpath("href").extract()
Here is the modified version of your spider:
class HouseDirectorySpider(scrapy.Spider):
name = 'thehousedirectory'
start_urls = ['https://www.thehousedirectory.com/category/interior-designers-architects/london-interior-designers/']
def parse(self, response):
for buyers in response.xpath('//*[@class="company-details"]'):
yield {
'Name' : buyers.xpath('.//*[@class="heading"]/a/h2/text()').get(),
'Email' : buyers.xpath('.//p/a[starts-with(@href,"mailto:")]/text()').get(),
}
next_url = response.css('.custom-pagination > a.next:contains("Next Page")')
if next_url:
url = next_url.css("::attr(href)").get()
yield scrapy.Request(url,callback=self.parse)