node.jsweb-scrapingdomhtml-metacheerio

cheerio: trouble selecting meta property


I want to extract some metadata from HTML meta tags. The following meta tags are present in the fetched HTML.

<meta property="og:type" content="offer"/>
<meta property="og:title" content='خانه ذرت با کورن داگ لذیذ و خوشمزه  در تهران' />

I have written the below sample code to get content of both og:type and og:title properties from meta tags:

var request = require('request');
var cheerio = require('cheerio');

var a='http://someurl/';
getDealInfo(a);


function getDealInfo(url){
    var options = {
        url: encodeURI(url),
        headers: {
            'Accept' : '*/*',
            'Cache-Control':'no-cache',
        }
    };
    request(options, function(error, response, html){
        if (!error && response.statusCode == 200) {
            var $ = cheerio.load(html);
            var title = $('meta[property="og:title"]').attr('content');
            console.log('title: ' + title);
            var type = $('meta[property="og:type"]').attr('content');
            console.log('type: ' + type);
        }else console.log('Error accessing Deal:' + response.statusCode + '\n'+error);
    });
}

I get correct content for og:type and undefined for og:title , though both properties are present in the fetched html.

Can someone help me figure out why I cannot get the og:title property content?


Solution

  • I note that html response don't have meta og:title

    You can see this by using:

    request(options, function(error, response, html){
        fs.writeFile('./index.html', html)
    })
    

    But you can use needle https://www.npmjs.com/package/needle package instead request

    var needle = require('needle')
    var results = []
    needle.get(encodeURI(url), function(err, res) {
        if (err) throw err
        var $ = cheerio.load(res.body)
        var title = $('meta[property="og:title"]').attr('content')
        results.push({
            title: title
        })
        fs.writeFile('./data.json', JSON.stringify(results))
    })
    

    Output data.json file with og:title content:

    [
        {
            "title": "خانه ذرت با کورن داگ لذیذ و خوشمزه  در تهران"
        }
    ]