When making a get request to twitter api, it returns all the tweets in the array tweets.statuses
. We can find the total tweet counts using tweets.statuses.length
. In one request it can only return a maximum of 100 tweets even if there are 1000s of tweets available.
Similarly the metadata is returned in the array tweets.search_metadata
which looks like the object as shown below. Here count is the count that is passed to the twitter api and not the count of tweets returned.
{ completed_in: 0.13,
max_id: 1049894626625286100,
max_id_str: '1049894626625286144',
next_results: '? max_id=1049894470475485183&q=apple&count=100&include_entities=1',
query: 'apple',
refresh_url: '?since_id=1049894626625286144&q=apple&include_entities=1',
count: 100,
since_id: 0,
since_id_str: '0' }
In the metadata above, we can check to see if next_results
exists. If it does, it means there are more results available so we can make the request to twitter api again and again , by passing it a new max_id every time, until next_results is null i.e. it does not exist when the last batch of results are returned. Every time next_results exists a new max_id is generated which can be used to fetch the next 100 tweets.
To solve this I am using a do while loop in which the code block i.e. the twitter api request will run at least once before check the condition i.e. if next_results
exists.
The problem is that my do while loop only runs once even though next_results
is still available and not null. What am i doing wrong!
My node.js code looks like this:
require('dotenv').load();
var Twitter = require('twitter');
var client = new Twitter({
consumer_key: process.env.TWITTER_CONSUMER_KEY,
consumer_secret: process.env.TWITTER_CONSUMER_SECRET,
bearer_token: process.env.TWITTER_BEARER_TOKEN
});
var url = 'apple';
var totalCount = 0;
var resultsExist, maxid, isEqualsToLocation, andLocation;
do {
client.get('search/tweets', {q: url, count:100, max_id: maxid})
.then(function(tweets){
console.log('next_results: ',tweets.search_metadata.next_results)
console.log('totalCount: ',tweets.statuses.length)
console.log(tweets.search_metadata)
totalCount += tweets.statuses.length
console.log(totalCount)
console.log(tweets.search_metadata.next_results == null)
if(tweets.search_metadata.next_results != null){
resultsExist = tweets.search_metadata.next_results
console.log('result is', resultsExist)
isEqualsToLocation = resultsExist.indexOf('=');
andLocation = resultsExist.indexOf('&');
maxid = resultsExist.substring(isEqualsToLocation+1,andLocation);
console.log(maxid)
} else {
resultsExist = tweets.search_metadata.next_results
}
console.log(resultsExist == null)
})
}
while (resultsExist != null);
In your scenario, at the first, client will be created, then "do {" line execute, then "client.get(..." line execute, then "while (resultsExist != null)" execute that is false. After all, when your response return from twitter, call back function "function(tweets){" will be execute. so your do/while loop will be run just 1 time. I don't have any twitter customer key for test, but below code must be work fine
require('dotenv').load();
var Twitter = require('twitter');
var client = new Twitter({
consumer_key: process.env.TWITTER_CONSUMER_KEY,
consumer_secret: process.env.TWITTER_CONSUMER_SECRET,
bearer_token: process.env.TWITTER_BEARER_TOKEN
});
var url = 'apple';
var max_id;
async function getAllTwits(q, count, max_id){
var totalCount = 0;
var resultsExist, maxid, isEqualsToLocation, andLocation;
maxid = max_id
do {
var tweets = await client.get('search/tweets', {q: q, count:count, max_id: maxid});
console.log('next_results: ',tweets.search_metadata.next_results)
console.log('totalCount: ',tweets.statuses.length)
console.log(tweets.search_metadata)
totalCount += tweets.statuses.length
console.log(totalCount)
console.log(tweets.search_metadata.next_results == null)
if(tweets.search_metadata.next_results != null){
resultsExist = tweets.search_metadata.next_results
console.log('result is', resultsExist)
isEqualsToLocation = resultsExist.indexOf('=');
andLocation = resultsExist.indexOf('&');
maxid = resultsExist.substring(isEqualsToLocation+1,andLocation);
console.log(maxid)
} else {
resultsExist = tweets.search_metadata.next_results
}
console.log(resultsExist == null)
}
while (resultsExist != null);
}
getAllTwits(url, 100, max_id);