I'm currently creating a shiny app that load recent Dutch tweets concerning the corona virus, and on another tab I want to display a wordcloud with the most frequently used words.
The table works fine, but the wordcloud shows mainly chinese signs. I was thinking that it may be smileys used in the tweets, but that doesn't seem to be the case.
The code that i've written:
library(tidyverse)
library(shiny)
library(rtweet)
library(dplyr)
library(glue)
library(reactable)
library(purrr)
library(wordcloud2)
library(tidytext)
library(tm)
make_url_html <- function(url) {
if(length(url) < 2) {
if(!is.na(url)) {
as.character(glue("<a title = {url} target = '_new' href = '{url}'>{url}</a>") )
} else {
""
}
} else {
paste0(purrr::map_chr(url, ~ paste0("<a title = '", .x, "' target = '_new' href = '", .x, "'>", .x, "</a>", collapse = ", ")), collapse = ", ")
}
}
# UI page instellen
ui <- fluidPage(
titlePanel("Corona op twitter"),
h4("Meest gebruikte woorden omtrent populaire COVID-19 hashtags op de Nederlandse twitter"),
tabsetPanel(
#Eerste tab bevat de twitter tabel
tabPanel(
title = "Zoek tweets",
sidebarLayout(
sidebarPanel(
# Radiobuttons voor de hastags
radioButtons(
inputId = "hashtag_to_search",
label = "Kies hashtag",
choices = c("#coronavirus" = "#coronavirus", "#coronahulp" = "#coronahulp")
),
#Slider voor het aantal tweets
sliderInput("num_tweets_to_download",
"Aantal tweets:",
min = 1,
max = 100,
value = 50)
),
mainPanel(
reactableOutput("tweet_table")
)
)
),
tabPanel(
# Tweede tab bevat de wordcloud
title = "Wordcloud",
sidebarLayout(
sidebarPanel(
radioButtons(
inputId = "hashtag",
label = "Choose hashtag",
choices = c("#coronavirus" = "virus", "#coronahulp" = "hulp")
),
sliderInput("num",
"Number of words:",
min = 1,
max = 100,
value = 50)
),
# Show a plot of the generated distribution
mainPanel(
wordcloud2Output("cloud", width = "100%", height = "800px"),
reactableOutput("table")
)
)
)
)
)
# Server met tabel en wordcloud
server <- function(input, output) {
# Data inladen
tweet_df <- reactive({
search_tweets(paste("lang:nl", input$hashtag_to_search), n = input$num_tweets_to_download, include_rts = FALSE)
})
# data schoonmaken
word <- c("we", "coronavirus", "nl", "nederland", "https", stopwords("nl"))
new_stopwords_df <- data.frame(word)
tweet_clean <- reactive({
req(tweet_df())
tweet_df() %>%
mutate(text = lapply(text, tolower),
text = str_replace_all(text, "https://t.co/[a-z,A-Z,0-9]*", ""),
text = str_replace(text,"RT @[a-z,A-Z,0-9,_]*: ",""),
text = str_replace_all(text,"#[a-z,A-Z]*",""),
text = str_replace_all(text,"@[a-z,A-Z]*",""),
text = str_replace_all(text,"\\b[a-zA-Z]{1}\\b",""),
text = str_replace_all(text,"[:digit:]",""),
text = str_replace_all(text,"[^[:alnum:] ]",""),
text = str_replace_all(text," "," ")) %>%
select(status_id, text) %>% unnest_tokens(word,text) %>%
anti_join(new_stopwords_df, by = "word") %>% drop_na(word)
})
tweet_clean_freq <- reactive({
req(tweet_clean())
tweet_clean() %>%
group_by(word) %>%
summarise(freq =n()) %>%
arrange(desc(freq)) %>%
head(data, n = 50)
})
output$table <- renderReactable({reactable(tweet_clean())})
output$cloud <- renderWordcloud2({
wordcloud2(data = tweet_clean_freq()
)
})
# Tabel
tweet_table_data <- reactive({
req(tweet_df())
tweet_df() %>%
select(user_id, status_id, created_at, screen_name, text, favorite_count, retweet_count, urls_expanded_url) %>%
mutate(
Tweet = glue::glue("{text} <a href='https://twitter.com/{screen_name}/status/{status_id}'>>> </a>"),
URLs = purrr::map_chr(urls_expanded_url, make_url_html)
)%>%
select(DateTime = created_at, User = screen_name, Tweet, Likes = favorite_count, RTs = retweet_count, URLs)
})
output$tweet_table <- renderReactable({
reactable::reactable(tweet_table_data(),
filterable = TRUE, searchable = TRUE, bordered = TRUE, striped = TRUE, highlight = TRUE,
showSortable = TRUE, defaultSortOrder = "desc", defaultPageSize = 25, showPageSizeOptions = TRUE, pageSizeOptions = c(25, 50, 75, 100, 200),
columns = list(
DateTime = colDef(defaultSortOrder = "asc"),
User = colDef(defaultSortOrder = "asc"),
Tweet = colDef(html = TRUE, minWidth = 190, resizable = TRUE),
Likes = colDef(filterable = FALSE, format = colFormat(separators = TRUE)),
RTs = colDef(filterable = FALSE, format = colFormat(separators = TRUE)),
URLs = colDef(html = TRUE)
)
)
})
}
# Applicatie
shinyApp(ui = ui, server = server)
I've tried to check what the problem is by adding a table under the wordcloud, but there it also shows chinese symbols. When I try my code outside of shiny context (and without reactive aspects), it seems to work fine.
Btw: I know I've not connected the radiobuttons yet, I want to get the wordcloud working first.
Thanks!
Found the problem, I didn't remove emoticons from the text.
I added this line of code;
text = sapply(text,function(row) iconv(row, "latin1", "ASCII", sub="")))
to the mutate function and that solved the issue.