jsongoweb-scrapinggo-mapgo-colly

add colly package output text to map in golang


i was making a web scraper with colly package, where it collects the ContestName and ContestTime from a website and make a json file.

so i did like this


    Contests := make(map[string]map[string]map[string]map[string]string)
    
    Contests["AtCoder"] = make(map[string]map[string]map[string]string)
    Contests["AtCoder"]["FutureContests"] = make(map[string]map[string]string)

    AtcoderFunc(Contests)


.................code..........

func AtcoderFunc(Contests map[string]map[string]map[string]map[string]string) {
    collector := colly.NewCollector(
        colly.AllowedDomains("atcoder.jp", "www.atcoder.jp"),
    )

    // loc, _ := time.LoadLocation("Asia/Calcutta")
    // format := "2006-01-02 15:04:05"
    // var i int
    format := "2006-01-02 15:04:05-0700"
    loc, _ := time.LoadLocation("Asia/Calcutta")


    for i := 1; i < 10; i++ {
        ContestSelTime := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(1)  a", i+1)
        ContestSelName := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(2)", i)

        // for contest name
        collector.OnHTML(ContestSelName, func(element *colly.HTMLElement) {
            ContestName := element.ChildText("a")
            fmt.Printf("%T \n", ContestName)
            fmt.Println(ContestName) // instead of printing i want to add it to the Contests["AtCoder"]["FutureContests"] map and print like json 
            

        })

        // for contestTime
        collector.OnHTML(ContestSelTime, func(element *colly.HTMLElement) {
            ContestStartTime := element.ChildText("time")
            parsed_time, _ := time.Parse(format, ContestStartTime)
            IST_time := parsed_time.In(loc)
            fmt.Println("Time in IST", IST_time) // instead of printing i want to add it to the Contests["AtCoder"]["FutureContests"] map.
        })

    }

    collector.OnRequest(func(request *colly.Request) {
        fmt.Println("Visiting", request.URL.String())
    })

    collector.Visit("https://atcoder.jp/contests")

}


got any ideas? i tried adding the value to the map like this

            Contests["AtCoder"]["FutureContests"] = map[string]string{
                "Name": string(ContestName),
            }

i want to make json like this

{
  "AtCoder": {
    "FutureContests": {
      "1": {
        "Name": "Contest name",
        "Start": "time here"
      },
      "2": {
        "Name": "Contest name",
        "Start": "time here"
      }
    }
  }
}

but it giving error cannot use (map[string]string literal) (value of type map[string]string) as map[string]map[string]string value in assignment

any idea?


Solution

  • The error was in the map assignment. It's pretty hard to manage a so nested structure but I found a way to successfully deal with it. Let me present the code:

    package main
    
    import (
        "encoding/json"
        "fmt"
        "strconv"
        "time"
    
        "github.com/gocolly/colly/v2"
    )
    
    type contest struct{}
    
    func AtcoderFunc(contests map[string]map[string]map[string]string) {
        collector := colly.NewCollector(
            colly.AllowedDomains("atcoder.jp", "www.atcoder.jp"),
        )
    
        format := "2006-01-02 15:04:05-0700"
        loc, _ := time.LoadLocation("Asia/Calcutta")
    
        contests["UpcomingContest"] = make(map[string]map[string]string)
    
        for i := 1; i < 3; i++ {
            rawI := strconv.Itoa(i)
            contests["UpcomingContest"][rawI] = make(map[string]string)
    
            contestSelTime := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(1)  a", i+1)
            contestSelName := fmt.Sprintf("#contest-table-upcoming  div  div  table  tbody  tr:nth-child(%d)  td:nth-child(2)", i)
    
            // for contest name
            collector.OnHTML(contestSelName, func(element *colly.HTMLElement) {
                contestName := element.ChildText("a")
                contests["UpcomingContest"][rawI]["Name"] = contestName
            })
    
            // for contestTime
            collector.OnHTML(contestSelTime, func(element *colly.HTMLElement) {
                ContestStartTime := element.ChildText("time")
                parsed_time, _ := time.Parse(format, ContestStartTime)
                IST_time := parsed_time.In(loc)
                contests["UpcomingContest"][rawI]["Time"] = fmt.Sprint(IST_time)
            })
        }
    
        collector.OnRequest(func(r *colly.Request) {
            fmt.Println("Visiting", r.URL.String())
        })
    
        collector.Visit("https://atcoder.jp/contests")
    }
    
    func main() {
        contests := make(map[string]map[string]map[string]map[string]string)
        contests["AtCoder"] = make(map[string]map[string]map[string]string)
    
        AtcoderFunc(contests["AtCoder"])
    
        data, _ := json.MarshalIndent(contests, "", "  ")
        fmt.Println(string(data))
    }
    

    More or less I keep your structure. Other than fixing the issue I refactored a little bit your example by changing some names and get rid of unused statements. Lastly, I used the MarshalIndent function to beautify the JSON string printed onto the terminal.
    Let me know if works also for you!