rplotlyr-markdown

Omit empty rows from plotly plot in R?


I'm working with plotly in RMarkdown to create interactive plots with dropdown menu list to filter years.

I'm having trouble trying to omit the empty categories from each year. I only want the top 5 categories for each year. This is the code I'm working with:

cid <- import("R:/COE/GIE/0 SERVIDORES/Lana Meijinhos/CID-10-SUBCATEGORIAS.CSV") %>%
  select(SUBCAT, DESCRABREV)

malf <- nv %>%
  filter(idanomal == "1")

malf$codanomal <- gsub("(.{4})", "*\\1", malf$codanomal)

malf_cid <- malf %>%
  separate_rows(codanomal, sep = "\\*") %>%  
  filter(codanomal != "") %>%  
  mutate(codanomal = ifelse(codanomal == "Q699", "Q690", codanomal)) %>%
  group_by(anonasc, codanomal) %>% 
  summarise(frequency = n(), .groups = "drop") %>%  
  arrange(anonasc, desc(frequency)) %>%
  spread(., key=anonasc, value=frequency) %>%
  adorn_totals("col") 

malf_cid[is.na(malf_cid)] <- 0  

res <- nv %>%
  group_by(anonasc) %>%
  tally %>%
  spread(., key=anonasc, value=n) 

colnames(res) <- paste0("nv", colnames(res))

malf_cid <- bind_cols(malf_cid, res)

malf_cid$tx20 <- round((malf_cid$"2020"/res$nv2020)*1000,1)
malf_cid$tx21 <- round((malf_cid$"2021"/res$nv2021)*1000,1)
malf_cid$tx22 <- round((malf_cid$"2022"/res$nv2022)*1000,1)
malf_cid$tx23 <- round((malf_cid$"2023"/res$nv2023)*1000,1)
malf_cid$tx24 <- round((malf_cid$"2024"/res$nv2024)*1000,1)

malf_cid <- malf_cid %>%
  left_join(cid, by = c("codanomal" = "SUBCAT")) %>%
  select(-codanomal) %>%
  rename("codanomal" = "DESCRABREV") %>%
  arrange(desc(Total)) 

graf <- malf_cid %>%
  select(codanomal, "2020":"2024") %>%
  pivot_longer(cols = starts_with("20"), names_to = "year", values_to = "n")

graf2 <- malf_cid %>%
  select(codanomal, tx20:tx24) %>%
  pivot_longer(cols = starts_with("tx"), names_to = "year", values_to = "frequency")

graf2$year <- ifelse(graf2$year == "tx20", "2020",
                     ifelse(graf2$year == "tx21", "2021",
                            ifelse(graf2$year == "tx22", "2022",
                                   ifelse(graf2$year == "tx23", "2023",
                                          ifelse(graf2$year == "tx24", "2024", NA)))))
graf3 <- malf_cid %>%
  select(codanomal, nv2020:nv2024) %>%
  pivot_longer(cols = starts_with("nv"), names_to = "year", values_to = "nv")

graf3$year <- ifelse(graf3$year == "nv2020", "2020",
                     ifelse(graf3$year == "nv2021", "2021",
                            ifelse(graf3$year == "nv2022", "2022",
                                   ifelse(graf3$year == "nv2023", "2023",
                                          ifelse(graf3$year == "nv2024", "2024", NA)))))

graf <- graf %>%
  left_join(graf2, by = c("year", "codanomal")) %>%
  left_join(graf3, by = c("year", "codanomal"))

graf <- graf %>%
  group_by(year) %>%
  mutate(codanomal = reorder(codanomal, frequency)) %>%
  ungroup() %>%
  mutate(frequency = ifelse(frequency == 0, NA, frequency),
         n = ifelse(n == 0, NA, n),
         nv = ifelse(is.na(n), NA,  nv),
         year = ifelse(is.na(n), NA,  year))

fig <- plot_ly() %>%
  add_trace(data = graf %>% filter(year == "2020" & frequency > 0) %>% arrange(desc(frequency)) %>% slice(1:5),
            x = ~frequency,
            y = ~codanomal,
            type = 'bar',
            name = '2020',
            hoverinfo = 'text',
            textposition = "none",
            text = ~paste('</br> Ano do Nascimento: ', year,
                          '</br> Causa: ', codanomal,
                          '</br> Número de Anomalias: ', n,
                          '</br> Número de Nascidos Vivos: ', nv,
                          '</br> Prevalência: ', frequency),
            visible = TRUE) %>%
  add_trace(data = graf %>% filter(year == "2021" & frequency > 0)  %>% arrange(desc(frequency)) %>% slice(1:5),
            x = ~frequency,
            y = ~codanomal,
            type = 'bar',
            name = '2021',
            hoverinfo = 'text',
            textposition = "none",
            text = ~paste('</br> Ano do Nascimento: ', year,
                          '</br> Causa: ', codanomal,
                          '</br> Número de Anomalias: ', n,
                          '</br> Número de Nascidos Vivos: ', nv,
                          '</br> Prevalência: ', frequency),
            visible = FALSE) %>%
  add_trace(data = graf %>% filter(year == "2022" & frequency > 0)  %>% arrange(desc(frequency)) %>% slice(1:5),
            x = ~frequency,
            y = ~codanomal,
            type = 'bar',
            name = '2022',
            hoverinfo = 'text',
            textposition = "none",
            text = ~paste('</br> Ano do Nascimento: ', year,
                          '</br> Causa: ', codanomal,
                          '</br> Número de Anomalias: ', n,
                          '</br> Número de Nascidos Vivos: ', nv,
                          '</br> Prevalência: ', frequency),
            visible = FALSE) %>%
  add_trace(data = graf %>% filter(year == "2023" & frequency > 0)  %>% arrange(desc(frequency)) %>% slice(1:5),
            x = ~frequency,
            y = ~codanomal,
            type = 'bar',
            name = '2023',
            hoverinfo = 'text',
            textposition = "none",
            text = ~paste('</br> Ano do Nascimento: ', year,
                          '</br> Causa: ', codanomal,
                          '</br> Número de Anomalias: ', n,
                          '</br> Número de Nascidos Vivos: ', nv,
                          '</br> Prevalência: ', frequency),
            visible = FALSE) %>%
  add_trace(data = graf %>% filter(year == "2024" & frequency > 0)  %>% arrange(desc(frequency)) %>% slice(1:5),
            x = ~frequency,
            y = ~codanomal,
            type = 'bar',
            name = '2024',
            hoverinfo = 'text',
            textposition = "none",
            text = ~paste('</br> Ano do Nascimento: ', year,
                          '</br> Causa: ', codanomal,
                          '</br> Número de Anomalias: ', n,
                          '</br> Número de Nascidos Vivos: ', nv,
                          '</br> Prevalência: ', frequency),
            visible = FALSE) %>%
  layout(width = 820,
         yaxis = list(title = " ", linecolor = 'black'),
         xaxis = list(side = 'bottom', title = 'Prevalência de Malformação Congênita (/1.000 nascidos vivos)', showgrid = F, zeroline = T,
                      linecolor = 'black', range = c(0, max(graf$frequency)+2)),
         colorway = c("#4567a9", "#118dff", "#107dac", "#1ebbd7", "#064273"),
         showlegend = F,
         margin = list(l = 0, r = 0, b = 0, t = 0, pad = 0),  # Adjusted to remove margins
         xaxis = list(
           showline = TRUE,  # Added to show x-axis line
           showgrid = FALSE   # Added to hide x-axis grid
         ),
         updatemenus = list(
           list(
             buttons = list(
               list(method = "restyle",
                    args = list("visible", list(TRUE, FALSE, FALSE, FALSE, FALSE)),
                    label = "2020"),
               list(method = "restyle",
                    args = list("visible", list(FALSE, TRUE, FALSE, FALSE, FALSE)),
                    label = "2021"),
               list(method = "restyle",
                    args = list("visible", list(FALSE, FALSE, TRUE, FALSE, FALSE)),
                    label = "2022"),
               list(method = "restyle",
                    args = list("visible", list(FALSE, FALSE, FALSE, TRUE, FALSE)),
                    label = "2023"),
               list(method = "restyle",
                    args = list("visible", list(FALSE, FALSE, FALSE, FALSE, TRUE)),
                    label = "2024")
             ),
             direction = "down",
             pad = list(r = 10, t = 10),
             showactive = TRUE,
             x = -0.4,
             xanchor = "left",
             y = 1.1,
             yanchor = "top"
           )
         )
  )

fig

And this is the plot it generates for 2024, for example:

enter image description here

I only want to keep the categories that have actually bars and omit the empty ones. I've tried everything but nothing seems to work.

Any tips?


Solution

  • There are a few ways you can implement this, but since you used visibility with your buttons, we'll start with that method.

    If I wanted to make the first button show only those entries in y that are not empty, I have to define the categoryarray, that's within layout(yaxis = list(categoryarray =....

    The category array is a list of each unique value you have listed on the y axis -- your codanomal.

    Visibility and the Category Array

    The control of visibility is in the purview of restyle, as you identified in your buttons. However, categoryarray is under relayout, so in order to update both, you need to use the method update.

    The main difference when using update, versus one of the others is that you need at least two lists within args: one list for elements to restyle and one list for elements to relayout. There can, of course, be many lists nested within.

    The args will looks like this:

    args = list(list(visible = ...),
                list(yaxis = list(categoryarray = ...)))
    

    Or in terms of your code, it would look something like this (where dta is the data):

    args = list(list("visible", list(TRUE, FALSE, FALSE, FALSE, FALSE)),
                list(yaxis = list(categoryarray = unique(dta[dta$year == 2020,]$codanomal))))
    
    

    More details specific to your code is at the end of this answer.

    Without a reproducible question, I can't necessarily give you an exact answer. To make things simpler, I've created some simple data to use as an example to emulate what you've got going on there on a much smaller scale.

    library(tidyverse)
    library(plotly)
    
    set.seed(35446)
    dta <- data.frame(
      animals = sample(c("penguin", "dolphin", "dolphin", "horse", "cat"), 25, replace = T),
      consumed = sample(10:1000, 100, replace = T),
      year = sample(2020:2024, 100, replace = T)
    ) %>% arrange(year, desc(animals))   # just for plotly -- keep it in order! (sigh)
    

    I could plot each year separately, but I would only do that if I never want the graph to show all years at the same time. If I wanted to only show one year at time, I would use add_trace.

    In this example, plot_ly() is the trace that is visible, where the add_trace() houses the rest of the data, that is not visible. The reason this still works with visible as a button argument is because of the argument split.

    plot_ly(filter(dta, year == min(dta$year)), type = "bar", visible = T,
            x = ~consumed, y = ~animals, split = ~year, showlegend = F) %>% 
      add_trace(inherit = F, 
                data = filter(dta, year != min(dta$year)), type = "bar", visible = F,
                x = ~consumed, y = ~animals, split = ~year, showlegend = F)
    

    In order to make buttons I could write out each one, but I don't have to.

    I'm going to use purrr's imap() so that I get both a what I'm sequencing and the iteration number. I'm going to sequence the years, because that's how the buttons are splitting the data.

    I know that I have 5 traces (typically 5 colors == 5 traces). That means I need visibility assigned for each one (as you have done in your buttons). I also need the categoryarray for each button.

    btns <- imap(unique(sort(dta$year)), \(j, k) {
      vis <- rep(F, 5)             # create an array of F for each trace
      vis[k] <- T                  # change the current iteration to TRUE
      dtb <- dta %>% filter(year == j)   # identify the trace data
      list(method = "update", label = as.character(j), # year as the label
           args = list(list(visible = as.list(vis)),   # visibility;    restyle args
                       # only the categories on this data;              relayout args
                       list(yaxis = list(categoryarray = unique(dtb$animals))))
      )
    })
    

    Next is the assembly of the plot and buttons.

    plot_ly(filter(dta, year == min(dta$year)), type = "bar", visible = T,
            x = ~consumed, y = ~animals, split = ~year, showlegend = F) %>% 
      add_trace(inherit = F, 
                data = filter(dta, year != min(dta$year)), type = "bar", visible = F,
                x = ~consumed, y = ~animals, split = ~year, showlegend = F) %>% 
      layout(updatemenus = list(list(buttons = btns)))
    
    

    2020 2023

    BTW:

    The manner in which you prepared your data in your code looks as if you could also summarize the data as follows, where consumed in this data represents frequency in yours.

    dta %>% filter(consumed > 0) %>% group_by(year) %>% 
      arrange(desc(consumed)) %>% slice(1:5)
    

    Using Your Code

    As I said, without a reproducible question, I can't be certain this next bit of code is going to do what I expect. However, I'm pretty confident this could replace all of your plotly calls

    That being said, I left out your call for hovertext. What are you expecting this to do? Is it doing what you expected? I've replaced this with hovertemplate and what I think you wanted in the hover content.

    dta <- filter(graf, frequency > 0) %>% group_by(year) %>% 
      arrange(desc(frequency)) %>% slice(1:5)
    
    btns <- imap(unique(sort(dta$year)), \(j, k) {
      vis <- rep(F, 5)             # create an array of F for each trace
      vis[k] <- T                  # change the current iteration to TRUE
      dtb <- dta %>% filter(year == j)   # identify the trace data
      list(method = "update", label = as.character(j), # year as the label
           args = list(list(visible = as.list(vis)),   # visibility;    restyle args
                       # only the categories on this data;              relayout args
                       list(yaxis = list(categoryarray = unique(dtb$codanomal))))
      )
    })
    
    plot_ly(type = "bar", 
            data = filter(dta, year == min(dta$year)),
            name = ~year, x = ~frequency, y = ~codanomal,
            customdata = ~pmap(list(year, n, nv), list)                    # connect the data
            hovertemplate = paste0('Ano do Nascimento: %{customdata[0]}',  # year
                                   '</br> Causa: %{y}',                    # codanomal in {y}
                                   '</br> Número de Anomalias: %{customdata[1]}',      # n
                                   '</br> Número de Nascidos Vivos: %{customdata[2]}', # nv
                                   '</br> Prevalência: %{x}'),             # frequency in {x}
            visible = T) %>% 
      add_trace(inherit = F, type = "bar",
                data = filter(dta, year != min(dta$year)),
                name = ~year, x = ~frequency, y = ~codanomal,
                customdata = ~pmap(list(year, n, nv), list)                    # connect the data
                hovertemplate = paste0('Ano do Nascimento: %{customdata[0]}',  # year
                                       '</br> Causa: %{y}',                    # codanomal in {y}
                                       '</br> Número de Anomalias: %{customdata[1]}',      # n
                                       '</br> Número de Nascidos Vivos: %{customdata[2]}', # nv
                                       '</br> Prevalência: %{x}'),             # frequency in {x}
                visible = T) %>% 
      layout(xaxis = list(title = 'Prevalência de Malformação Congênita (/1.000 nascidos vivos)', 
                          showgrid = F, showline = T),
             yaxis = list(showgrid = F, showline = T),
             colorway = c("#4567a9", "#118dff", "#107dac", "#1ebbd7", "#064273"),
             showlegend = F,
             margin = list(0),
             updatemenus = list(list(
               buttons = btns,
               direction = "down",
               pad = list(r = 10, t = 10), showactive = TRUE, 
               x = -0.4, xanchor = "left", y = 1.1, yanchor = "top"
             ))
      )