python twitter tweepy twitterapi-python twitter-api-v2

How to retrieve more than a 100 tweets on twitter API V2 using python

I upgraded my access to Basic, and I am allowed to retrieve 10,000 tweets per month. I am trying to scrape 3000 tweets for a particular key word, my code is attached, but i get an error that the max i can scrape is a 100. Is there a way to automate this so that I can reach my goal? Thank you very much for your help.

import tweepy
from textblob import TextBlob
from wordcloud import WordCloud
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import re

client = tweepy.Client(
    bearer_token=bearerToken,
    consumer_key=consumerKey,
    consumer_secret=consumerSecret,
    access_token=accessToken,
    access_token_secret=accessTokenSecret
)
response = client.search_recent_tweets(query='nurofen', max_results=3316)
print(response.meta)

tweets = response.data

nurofen_twts = pd.DataFrame([tweet.text for tweet in tweets], columns=['Tweets'])

I tried changing my code, but this time I was getting a 403 error that said I needed to upgrade my access level. The code is below:

# Authenticate with Twitter API
auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessToken, accessTokenSecret)

# Create API client
client = tweepy.API(auth)


# Define the function to clean the tweets
def clean_text(text):
    text = re.sub(r'@[A-Za-z0-9]+', '', text)  # Removes @mentions
    text = re.sub(r'#', '', text)
    text = re.sub(r'RT[\s]+', '', text)
    text = re.sub(r'https?:\/\/\S+', '', text)
    return text


# Variables
query = 'nurofen'
total_tweets = 3316
max_results = 100
tweets = []

# Fetch tweets
while len(tweets) < total_tweets:
    if total_tweets - len(tweets) < max_results:
        max_results = total_tweets - len(tweets)
    response = client.search_tweets(q=query, max_results=max_results)

    tweets.extend(response.data)
    if response.meta.next_token:
        next_token = response.meta.next_token
        response = client.search_tweets(q=query, max_results=max_results, next_token=next_token)
    else:
        break


# Create dataframe from tweets
nurofen_twts = pd.DataFrame([clean_text(tweet.text) for tweet in tweets], columns=['Tweets'])

print(nurofen_twts)

Solution

Use a loop:

import math

num = 3316
num /= 100
# round to the nearest 100
# turns 3316 into 3400
# but won't turn 3300 into 3400
num = math.ceil(num) * 100
# inclusive
num += 1

tweets = []
until_id = None
for i in range(0, num, 100):
  response = client.search_recent_tweets(query='nurofen', max_results=100, until_id=until_id)
  # until_id
  print(response.meta)
  
  tweets_chunk = response.data
  
  tweets.extend(tweets_chunk)
  
  until_id = tweets_chunk[-1].id # or the other way how to get the last tweet ID from the list

It just requests 100 tweets, stores the last tweet ID, then requests another 100 tweets which were posted with a smaller ID than the stored ID, until it requests all the tweets you need.

It rounds it to the nearest 100 up, so if you have 3300 it will not do anything, but if you input 3301 it will round it up to 3400.