goweb-scrapinggo-colly

go-colly returning empty slice


I am trying to scrape a website, but it seems my slice of products it's empty.

scraper.go:

package scraper

import (
    "fmt"
    "strings"

    "github.com/gocolly/colly"
    "github.com/gocolly/colly/extensions"
)

type Product struct {
    name      string
    fullPrice string
    url       string
}

func Scraper(site string) []Product {

    products := []Product{}
    c := colly.NewCollector()
    replacer := strings.NewReplacer("R$", "", ",", ".")
    c.OnHTML("div#column-main-content", func(e *colly.HTMLElement) {
        fullPrice := e.ChildText("span.m7nrfa-0.eJCbzj.sc-ifAKCX.ANnoQ")
        product := Product{
            name:      e.ChildText("h2"),
            fullPrice: replacer.Replace(fullPrice),
            url:       e.ChildAttr("a.sc-1fcmfeb-2.iezWpY", "href"),
        }
        fmt.Println(product)
        products = append(products, product)
    })
    fmt.Println(products)

    c.OnRequest(func(r *colly.Request) {
        fmt.Println("Visiting", r.URL)
    })

    c.OnError(func(r *colly.Response, err error) {
        fmt.Println("Request URL:", r.Request.URL, "failed with response:", r.Request, "\nError:", err)
    })

    // Uses a random User-Agent in each request
    extensions.RandomUserAgent(c)

    c.Visit(site)
    return products
}

main.go:

package main

import "github.com/Antonio-Costa00/Go-Price-Monitor/scraper"

func main() {
    scraper.Scraper("https://sp.olx.com.br/?q=iphone%27")
}

product variable has an output, but the slice is empty.

slice output:

[]

I don't know if I am doing something wrong when appending the result to products slice.

Can someone help me to check if I am doing something wrong to return an empty slice?


Solution

  • The Colly library does the scraping asynchronously, so when you print the products it is empty, but it will be filled in another goroutine. By using the OnScraped handler and printing the products there you should see it is filled.

    package scraper
    
    import (
            "fmt"
            "strings"
    
            "github.com/gocolly/colly"
            "github.com/gocolly/colly/extensions"
    )
    
    type Product struct {
            name      string
            fullPrice string
            url       string
    }
    
    func Scraper(site string) []Product {
            products := []Product{}
            c := colly.NewCollector()
            replacer := strings.NewReplacer("R$", "", ",", ".")
            c.OnHTML("div#column-main-content", func(e *colly.HTMLElement) {
                    fullPrice := e.ChildText("span.m7nrfa-0.eJCbzj.sc-ifAKCX.ANnoQ")
                    product := Product{
                            name:      e.ChildText("h2"),
                            fullPrice: replacer.Replace(fullPrice),
                            url:       e.ChildAttr("a.sc-1fcmfeb-2.iezWpY", "href"),
                    }
                    fmt.Println(product)
                    products = append(products, product)
            })
    
            c.OnRequest(func(r *colly.Request) {
                    fmt.Println("Visiting", r.URL)
            })
    
            c.OnError(func(r *colly.Response, err error) {
                    fmt.Println("Request URL:", r.Request.URL, "failed with response:", r.Request, "\nError:", err)
            })
    
            c.OnScraped(func(r *colly.Response) {
                    fmt.Println(products)
            })
    
            // Uses a random User-Agent in each request
            extensions.RandomUserAgent(c)
    
            c.Visit(site)
            return products
    }