rweb-scrapingshinytwitter

Trying to create an R Shiny app that will crawl Twitter and create a word cloud


I've literally never used Shiny before, so I apologize if this is a really dumb question. I am trying to make a Shiny app in which you can input a search term that will be pulled from Twitter and create a word cloud. I feel like I'm ALMOST there, but it isn't working, and I honestly don't really know what I'm doing. I'm trying to teach Shiny to myself, but I also know that forums like this can be really useful for learning.

library(tm)
library(wordcloud)
library(memoise)



ui <- fluidPage(
# Application title
titlePanel("Word Cloud"),

sidebarLayout(
# Sidebar with a slider and selection inputs
sidebarPanel(
textInput("selection", "Input your search term:",
              ""),
actionButton("update", "Change"),
hr(),
sliderInput("freq",
              "Minimum Frequency:",
              min = 1,  max = 50, value = 15),
  sliderInput("max",
              "Maximum Number of Words:",
              min = 1,  max = 300,  value = 100)
),

# Show Word Cloud
mainPanel(
  plotOutput("plot")
)
)
)
#Define server logic

server <- function(input, output, session) {
 # Define a reactive expression for the document term matrix
  terms <- reactive({
consumer_key <- "XXXX"
consumer_secret <- "XXXX"
access_token <- "XXXX"
access_secret <- "XXXX"
#Here we are creating the "handshake" with Twitter
setup_twitter_oauth(consumer_key= consumer_key, consumer_secret= 
consumer_secret,access_token= access_token, access_secret= access_secret)

#Once you have created your handshake, you can start searching for tweets
#Note that if you select a common term like "Atlanta" you will generate a lot 
of Tweets quickly
#But if you select an esoteric term like "heteroscedasticity", it might take 
a while to get any
tw<-searchTwitter("selection", n=1000, lang='en', resultType = "recent")
# Using "memoise" to automatically cache the results
getTermMatrix <- memoise(function(tw) {
  text <- readLines(sprintf(tw),
                    encoding="UTF-8")

  myCorpus = Corpus(VectorSource(text))
  myCorpus = tm_map(myCorpus, content_transformer(tolower))
  myCorpus = tm_map(myCorpus, removePunctuation)
  myCorpus = tm_map(myCorpus, removeNumbers)
  myCorpus = tm_map(myCorpus, removeWords,
                    c(stopwords("SMART"), "thy", "thou", "thee", "the", 
"and", "but"))

  myDTM = TermDocumentMatrix(myCorpus,
                             control = list(minWordLength = 1))

  m = as.matrix(myDTM)

  sort(rowSums(m), decreasing = TRUE)
})

# Change when the "update" button is pressed...
input$update
# ...but not for anything else
isolate({
  withProgress({
    setProgress(message = "Processing corpus...")
    getTermMatrix(input$selection)

  })


})

 })
# Make the wordcloud drawing predictable during a session
 wordcloud_rep <- reactive({

 v <- terms()
 wordcloud_rep(names(v), v, scale=c(4,0.5),
            min.freq = input$freq, max.words=input$max,
            colors=brewer.pal(8, "Dark2"))

})
}

# Run the application 
shinyApp(ui = ui, server = server)

Thank you to anyone who tries to help!!

Edit: Ah, sorry, didn't clarify what was wrong! Thus far, it opens up an app with all the input boxes I want, but putting in a search term doesn't seem to actually do anything. It just loads forever. No errors. App Console


Solution

  • Hey sorry for the first answer...,

    The app is working with the following code, i think you should consider a basic shiny tutorial to completly understand everything, i.e outputs are connected to the ui in the same way like inputs --> server: output$plot <- renderPlot({...}) and ui: plotOutput("plot"). I used a observeEvent to check if the update button was clicked. The slider input for the max. Words is also working but not the min. frequency because i could not work out what you are doing with it. I also just did a basic word cleaning. I hope this helps:

    library(tm)
    library(wordcloud)
    library(memoise)
    # new libs
    library(twitteR)
    library(tidyverse)
    library(tidytext)
    
    
    ui <- fluidPage(
        # Application title
        titlePanel("Word Cloud"),
    
        sidebarLayout(
            # Sidebar with a slider and selection inputs
            sidebarPanel(
                textInput("selection", "Input your search term:",
                          ""),
                actionButton("update", "Change"),
                hr(),
                sliderInput("freq",
                            "Minimum Frequency:",
                            min = 1,  max = 50, value = 15),
                sliderInput("max",
                            "Maximum Number of Words:",
                            min = 1,  max = 300,  value = 100)
            ),
    
            # Show Word Cloud
            mainPanel(
                plotOutput("plot")
            )
        )
    )
    #Define server logic
    
    api_key <- Sys.getenv("twitter_api")
    api_secret  <- Sys.getenv("twitter_secret")
    access_token <- Sys.getenv("twitter_access_token")
    access_secret <- Sys.getenv("twitter_access_secret")
    
    server <- function(input, output, session) {
        tweets_clean <- reactiveValues(df = NULL)
        # Define a reactive expression for the document term matri
            #Here we are creating the "handshake" with Twitter
            setup_twitter_oauth(access_token = access_token ,access_secret = access_secret,
                                consumer_key = api_key,consumer_secret = api_secret )
    
        observeEvent(input$update,{
    
            tw <- searchTwitter(input$selection, n=input$max, lang='en', resultType = "recent")
            # tweets to df so we could use tidytext
            df <- twListToDF(tw)
            # use dplyr and tidytext to clean your tweets
            tweets_clean$df <- df %>%
                dplyr::select(text) %>%
                tidytext::unnest_tokens(word, text) %>%
                count(word, sort = TRUE) 
        })
        output$plot <- renderPlot({
            # plot it
            if(is.null(tweets_clean$df)){
                NULL
            } else{
                wordcloud(tweets_clean$df$word, tweets_clean$df$n)
            }
        })
    
    }
    
    # Run the application 
    shinyApp(ui = ui, server = server)`enter code here`