pythonweb-scrapingscrapyscrapy-shell

Scrapy doesn't go to the next url


the next_page variable gives the correct link when used on shell and even when printed on Console but Scrapy still keeps scraping the same(first) page

code below:

class QuotesSpider(scrapy.Spider):
    name = "Bider"
    def start_requests(self):
        urls = [
            "https://www.flipkart.com/clothing-and-accessories/bottomwear/pr?sid=clo,vua&p[]=facets.ideal_for%255B%255D%3DMen&p[]=facets.ideal_for%255B%255D%3Dmen&otracker=categorytree&fm=neo%2Fmerchandising&iid=M_1064313a-7a8d-48f3-8199-daaf60d62ef6_2_372UD5BXDFYS_MC.8HARX8UX7IX5&otracker=hp_rich_navigation_2_2.navigationCard.RICH_NAVIGATION_Fashion~Men%2527s%2BBottom%2BWear_8HARX8UX7IX5&otracker1=hp_rich_navigation_PINNED_neo%2Fmerchandising_NA_NAV_EXPANDABLE_navigationCard_cc_2_L1_view-all&cid=8HARX8UX7IX5"
        ]
        for url in urls:
                yield scrapy.Request(url=url,callback=self.parse)

def parse(self, response):
        
        browser=webdriver.Chrome()
        
        browser.get(response.request.url) 

        next_page=response.css("a._1LKTO3::attr(href)").getall()
        try:
            next_page=next_page[-1]
        except:
            time.sleep(1)
            next_page=response.css("a._1LKTO3::attr(href)").getall()
            next_page=next_page[-1]
        print("\n\n\n NEXT PAGE\n\n\n")
        print("\n"+next_page+"\n")
        print(response.urljoin(next_page))
        if next_page is not None:
            next_page=response.urljoin(next_page)
            # yield scrapy.Request(url=next_page,callback=self.parse)

            yield scrapy.Request(next_page, callback=self.parse)

Solution

  • Your code works for me so I'm not sure why it doesn't work for you. Anyway this pagination also works but it's cleaner.

    import scrapy
    from selenium import webdriver
    
    
    class QuotesSpider(scrapy.Spider):
        name = "Bider"
    
        def start_requests(self):
            urls = [
                "https://www.flipkart.com/clothing-and-accessories/bottomwear/pr?sid=clo,vua&p[]=facets.ideal_for%255B%255D%3DMen&p[]=facets.ideal_for%255B%255D%3Dmen&otracker=categorytree&fm=neo%2Fmerchandising&iid=M_1064313a-7a8d-48f3-8199-daaf60d62ef6_2_372UD5BXDFYS_MC.8HARX8UX7IX5&otracker=hp_rich_navigation_2_2.navigationCard.RICH_NAVIGATION_Fashion~Men%2527s%2BBottom%2BWear_8HARX8UX7IX5&otracker1=hp_rich_navigation_PINNED_neo%2Fmerchandising_NA_NAV_EXPANDABLE_navigationCard_cc_2_L1_view-all&cid=8HARX8UX7IX5"
            ]
            for url in urls:
                yield scrapy.Request(url=url, callback=self.parse)
    
        def parse(self, response):
            browser = webdriver.Chrome()
            browser.get(response.request.url)
    
            next_page = response.xpath('//a[span[text()="Next"]]/@href').get()
            
            if next_page:
                print("\n\n\n NEXT PAGE\n\n\n")
                print("\n"+next_page+"\n")
                next_page = response.urljoin(next_page)
                print(next_page)
                
                yield scrapy.Request(next_page, callback=self.parse)
    

    pagination proof