javascriptnode.jscheeriox-ray

html manipulation with Node JS


I want to get html from a source (link or file , ...) and find values from it. html format is :

<!doctype html>
<html>
<body>
  <main>
    <section id="serp">
      <div>
        <article>a</article>
        <article>b</article>
        <article>c</article>
        <article>d</article>
      </div>
    </section>
  </main>
</body>
</html>

first of all i used cheerio. according to docs i write:

const cheerio = require('cheerio');
const $ = cheerio.load(myhtml);
const content = $('#serp div').children();
console.log(content); // null

According to the same procedure i used x-ray and jsdom but all of them print null.


Solution

  • I've done the following:

    let myhtml = `<!doctype html>
    <html>
    <body>
      <main>
        <section id="serp">
          <div>
            <article>a</article>
            <article>b</article>
            <article>c</article>
            <article>d</article>
          </div>
        </section>
      </main>
    </body>
    </html>`;
    
    const cheerio = require('cheerio');
    const $ = cheerio.load(myhtml);
    const content = $('#serp div').children();
    console.log(content);
    console.log(`html: ${content.html()}`);
    

    it output the following to the console:

    initialize {
      '0': 
       { type: 'tag',
         name: 'article',
         namespace: 'http://www.w3.org/1999/xhtml',
         attribs: {},
         'x-attribsNamespace': {},
         'x-attribsPrefix': {},
         children: [ [Object] ],
         parent: 
          { type: 'tag',
            name: 'div',
            namespace: 'http://www.w3.org/1999/xhtml',
            attribs: {},
            'x-attribsNamespace': {},
            'x-attribsPrefix': {},
            children: [Object],
            parent: [Object],
            prev: [Object],
            next: [Object] },
         prev: 
          { type: 'text',
            data: '\n        ',
            parent: [Object],
            prev: null,
            next: [Circular] },
         next: 
          { type: 'text',
            data: '\n        ',
            parent: [Object],
            prev: [Circular],
            next: [Object] } },
      '1': 
       { type: 'tag',
         name: 'article',
         namespace: 'http://www.w3.org/1999/xhtml',
         attribs: {},
         'x-attribsNamespace': {},
         'x-attribsPrefix': {},
         children: [ [Object] ],
         parent: 
          { type: 'tag',
            name: 'div',
            namespace: 'http://www.w3.org/1999/xhtml',
            attribs: {},
            'x-attribsNamespace': {},
            'x-attribsPrefix': {},
            children: [Object],
            parent: [Object],
            prev: [Object],
            next: [Object] },
         prev: 
          { type: 'text',
            data: '\n        ',
            parent: [Object],
            prev: [Object],
            next: [Circular] },
         next: 
          { type: 'text',
            data: '\n        ',
            parent: [Object],
            prev: [Circular],
            next: [Object] } },
      '2': 
       { type: 'tag',
         name: 'article',
         namespace: 'http://www.w3.org/1999/xhtml',
         attribs: {},
         'x-attribsNamespace': {},
         'x-attribsPrefix': {},
         children: [ [Object] ],
         parent: 
          { type: 'tag',
            name: 'div',
            namespace: 'http://www.w3.org/1999/xhtml',
            attribs: {},
            'x-attribsNamespace': {},
            'x-attribsPrefix': {},
            children: [Object],
            parent: [Object],
            prev: [Object],
            next: [Object] },
         prev: 
          { type: 'text',
            data: '\n        ',
            parent: [Object],
            prev: [Object],
            next: [Circular] },
         next: 
          { type: 'text',
            data: '\n        ',
            parent: [Object],
            prev: [Circular],
            next: [Object] } },
      '3': 
       { type: 'tag',
         name: 'article',
         namespace: 'http://www.w3.org/1999/xhtml',
         attribs: {},
         'x-attribsNamespace': {},
         'x-attribsPrefix': {},
         children: [ [Object] ],
         parent: 
          { type: 'tag',
            name: 'div',
            namespace: 'http://www.w3.org/1999/xhtml',
            attribs: {},
            'x-attribsNamespace': {},
            'x-attribsPrefix': {},
            children: [Object],
            parent: [Object],
            prev: [Object],
            next: [Object] },
         prev: 
          { type: 'text',
            data: '\n        ',
            parent: [Object],
            prev: [Object],
            next: [Circular] },
         next: 
          { type: 'text',
            data: '\n      ',
            parent: [Object],
            prev: [Circular],
            next: null } },
      options: 
       { withDomLvl1: true,
         normalizeWhitespace: false,
         xml: false,
         decodeEntities: true },
      _root: 
       initialize {
         '0': 
          { type: 'root',
            name: 'root',
            namespace: 'http://www.w3.org/1999/xhtml',
            attribs: {},
            'x-attribsNamespace': {},
            'x-attribsPrefix': {},
            children: [Object],
            parent: null,
            prev: null,
            next: null },
         options: 
          { withDomLvl1: true,
            normalizeWhitespace: false,
            xml: false,
            decodeEntities: true },
         length: 1,
         _root: [Circular] },
      length: 4,
      prevObject: 
       initialize {
         '0': 
          { type: 'tag',
            name: 'div',
            namespace: 'http://www.w3.org/1999/xhtml',
            attribs: {},
            'x-attribsNamespace': {},
            'x-attribsPrefix': {},
            children: [Object],
            parent: [Object],
            prev: [Object],
            next: [Object] },
         options: 
          { withDomLvl1: true,
            normalizeWhitespace: false,
            xml: false,
            decodeEntities: true },
         _root: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] },
         length: 1,
         prevObject: initialize { '0': [Object], options: [Object], length: 1, _root: [Circular] } } }
    html: a
    
    Process finished with exit code 0