javascriptweb-scrapingcheeriounirest

How to scrape google images with unirest and cheerio?


I am trying to scrape google images by using unirest and cheerio, but I got stuck when I found that parsing was not happening correctly. This is my code currently :

const unirest = require("unirest");
const cheerio = require("cheerio");


const getData = async() => {
    let count= [] , page_url = [];
    let url =
    "https://www.google.com/search?q=india&oq=india&tbm=isch&asearch=ichunk&async=_id:rg_s,_pms:s,_fmt:pc&sourceid=chrome&ie=UTF-8";
const response = await unirest
.get(
    url
)
.headers({
  "User-Agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36",
})
.proxy(
  "proxy"
);

const $ = cheerio.load(response.body)
console.log(response.body)//html file returned successsfully
let title = [] , link = [];
$(".vbC6V").each((i,el) => {
title[i] = $(el).find(".iKjWAf .mVDMnf").text()//not parsing
link[i] = $(el).find(".rg_l .rg_ic").attr("src")//not parsing
})
console.log(title)//returned empty
console.log(link)//returned empty
}

getData();

Solution

  • So yeah I found out that the parent class for parsing will be rg_bx and not vbC6V. So the updated code will be :

    $(".rg_bx").each((i,el) => {
    title[i] = $(el).find(".iKjWAf .mVDMnf").text()
    link[i] = $(el).find(".rg_l .rg_ic").attr("src")
    })