Like I scrape 3 pages with the code below:
var Xray = require('x-ray');
var x = Xray();
x('https://blog.ycombinator.com/', '.post', [{
title: 'h1 a',
link: '.article-title@href'
}])
.paginate('.nav-previous a@href')
.limit(3)
.write('results.json')
How can I report the progression?
I tried the .then() but seems not work.
x('https://blog.ycombinator.com/', '.post', [{
title: 'h1 a',
link: '.article-title@href'
}])
.paginate('.nav-previous a@href')
.limit(3)
.write('results.json')
.then(
//something to report the progression
)
Or callback function which also wouldn't work
x('https://blog.ycombinator.com/', '.post', [{
title: 'h1 a',
link: '.article-title@href'
}])(()=>{
//something to report the progress
})
.paginate('.nav-previous a@href')
.limit(3)
.write('results.json')
the .then() can work, but not after write
.then() expected (I THINK!) a promise. after .write() there is nothing left.
You can try deleting the .write and using then to console.log the results like this:
var Xray = require('x-ray');
var x = Xray();
x('https://blog.ycombinator.com/', '.post', [{
title: 'h1 a',
link: '.article-title@href'
}])
.paginate('.nav-previous a@href')
.limit(3)
/* .write('results.json') */
.then(result => {
})
and that will print the title and link of the page you scraped.
you could use .then() and inside, print every result to a file using something like fs, for example
var Xray = require('x-ray');
const fs = require('fs')
var x = Xray();
x('https://blog.ycombinator.com/', '.post', [{
title: 'h1 a',
link: '.article-title@href'
}])
.paginate('.nav-previous a@href')
.limit(3)
.then(results => {
console.log(results)
let res = JSON.stringify(results, null, 2);
fs.writeFile('results.json', res, (err) => {
if (err) throw err
console.log('result saved!')
})
})
here JSON.stringify(results, null, 2) is just taking an object (results is an array of objects) and turning it into json (the third argument - that 2 - is just to make it pretty)
then using fs.writeFile (a native node module) you write the json object on results.json
you could even make it object by object using forEach()
like
results.forEach(result => {
//log the individual result and put in on an empty array, and then write the array
})