rbioconductor

Mutation/Lollipop plot using Trackviewer package, using the EBI Proteins REST API


I am looking to plot different variants in a gene on R/RStudio. I am ultimately looking to plot different mutations/variants inside the gene, and do it on both sides to compare new findings.

I was using the g3viz package, but since I needed to use different colours to better map functional domain, I switched to the TrackViewer package.

I am using this particular example and modelling my code on the example of pulling the EMBL-EBI API on here, but I keep hitting this error about the organism ID not being found and I am unsure of how to rectify this.

When I skipped those two lines of code and ran it again.

library(httr) # load library to get data from REST API
APIurl <- "https://www.ebi.ac.uk/proteins/api/" # base URL of the API
taxid <- "9606" # human tax ID
gene <- "TP53" # target gene
orgDB <- "org.Hs.eg.db" # org database to get the uniprot accession id
eid <- mget("TP53", get(sub(".db", "SYMBOL2EG", orgDB)))[[1]]

**Error in h(simpleError(msg, call)) : 
  error in evaluating the argument 'envir' in selecting a method** for function 'mget': object 'org.Hs.egSYMBOL2EG' not found

Full code segment from below link

library(httr) # load library to get data from REST API
APIurl <- "https://www.ebi.ac.uk/proteins/api/" # base URL of the API
taxid <- "9606" # human tax ID
gene <- "TP53" # target gene
orgDB <- "org.Hs.eg.db" # org database to get the uniprot accession id
eid <- mget("TP53", get(sub(".db", "SYMBOL2EG", orgDB)))[[1]]
chr <- mget(eid, get(sub(".db", "CHR", orgDB)))[[1]]
accession <- unlist(lapply(eid, function(.ele){
  mget(.ele, get(sub(".db", "UNIPROT", orgDB)))
}))
stopifnot(length(accession)<=20) # max number of accession is 20

tryCatch({ ## in case the internet connection does not work
  featureURL <- paste0(APIurl, 
                       "features?offset=0&size=-1&reviewed=true",
                       "&types=DNA_BIND%2CMOTIF%2CDOMAIN",
                       "&taxid=", taxid,
                       "&accession=", paste(accession, collapse = "%2C")
  )
  response <- GET(featureURL)
  if(!http_error(response)){
    content <- content(response)
    content <- content[[1]]
    acc <- content$accession
    sequence <- content$sequence
    gr <- GRanges(chr, IRanges(1, nchar(sequence)))
    domains <- do.call(rbind, content$features)
    domains <- GRanges(chr, IRanges(as.numeric(domains[, "begin"]),
                                     as.numeric(domains[, "end"]),
                                     names = domains[, "description"]))
    names(domains)[1] <- "DNA_BIND" ## this is hard coding.
    domains$fill <- 1+seq_along(domains)
    domains$height <- 0.04
    ## GET variations. This part can be replaced by user-defined data.
    variationURL <- paste0(APIurl,
                           "variation?offset=0&size=-1",
                           "&sourcetype=uniprot&dbtype=dbSNP",
                           "&taxid=", taxid,
                           "&accession=", acc)
    response <- GET(variationURL)
    if(!http_error(response)){
      content <- content(response)
      content <- content[[1]]
      keep <- sapply(content$features, function(.ele) length(.ele$evidences)>2 && # filter the data by at least 2 evidences
                       !grepl("Unclassified", .ele$clinicalSignificances)) # filter the data by classified clinical significances.
      nkeep <- c("wildType", "alternativeSequence", "begin", "end",
                 "somaticStatus", "consequenceType", "score")
      content$features <- lapply(content$features[keep], function(.ele){
        .ele$score <- length(.ele$evidences)
        unlist(.ele[nkeep]) 
      })
      variation <- do.call(rbind, content$features)
      variation <- 
        GRanges(chr, 
                IRanges(as.numeric(variation[, "begin"]),
                        width = 1,
                        names = paste0(variation[, "wildType"],
                                       variation[, "begin"],
                                       variation[, "alternativeSequence"])),
                score = as.numeric(variation[, "score"]),
                color = as.numeric(factor(variation[, "consequenceType"]))+1)
      variation$label.parameter.gp <- gpar(cex=.5)
      lolliplot(variation, domains, ranges = gr, ylab = "# evidences", yaxis = FALSE)
    }else{
      message("Can not get variations. http error")
    }
  }else{
    message("Can not get features. http error")
  }
},error=function(e){
  message(e)
},warning=function(w){
  message(w)
},interrupt=function(i){
  message(i)
})
**ERROR: unable to find an inherited method for function 'content' for signature '"response"'**

LINK TO THE PACKAGE: https://bioconductor.org/packages/devel/bioc/vignettes/trackViewer/inst/doc/lollipopPlot.html


Solution

  • So, this worked for me and produces an mutation plot that I can work with!

    library(httr) # load library to get data from REST API
        library(org.Hs.eg.db)
        library(trackViewer)
        APIurl <-"https://www.ebi.ac.uk/proteins/api/" # base URL of the API
        taxid <- "9606" # human tax ID
        gene <- "PCDH19" # target gene
        orgDB <- "org.Hs.eg.db" # 
    org database to get the uniprot accession id
    eid <- mget("PCDH19", get(sub(".db", "SYMBOL2EG", orgDB)))[[1]]
    chr <- mget(eid, get(sub(".db", "CHR", orgDB)))[[1]]
    accession <- unlist(lapply(eid, function(.ele){
      mget(.ele, get(sub(".db", "UNIPROT", orgDB)))
      
    }))
    stopifnot(length(accession)<=20) # max number of accession is 20
    
    tryCatch({ ## in case the internet connection does not work
      
      featureURL <- paste0(APIurl,
                           
                           "features?offset=0&size=-1&reviewed=true",
                           
                           "&types=DNA_BIND%2CMOTIF%2CDOMAIN",
                           
                           "&taxid=", taxid,
                           
                           "&accession=", paste(accession, collapse = "%2C")
                           
      )
      response <- GET(featureURL)
      
      if(!http_error(response)){
        
        content <- httr::content(response)
        
        content <- content[[1]]
        
        acc <- content$accession
        
        sequence <- content$sequence
        
        gr <- GRanges(chr, IRanges(1, nchar(sequence)))
        
        domains <- do.call(rbind, content$features)
        
        domains <- GRanges(chr, IRanges(as.numeric(domains[, "begin"]),
                                        
                                        as.numeric(domains[, "end"]),
                                        
                                        names = domains[, "description"]))
        
        names(domains)[1] <- "" ## this is hard coding.
        
        domains$fill <- 1+seq_along(domains)
        
        domains$height <- 0.04
        
        ## GET variations. This part can be replaced by user-defined data.
        
        variationURL <- paste0(APIurl,
                               
                               "variation?offset=0&size=-1",
                               
                               "&sourcetype=uniprot&dbtype=dbSNP",
                               
                               "&taxid=", taxid,
                               
                               "&accession=", acc)
        
        response <- GET(variationURL)
        
        if(!http_error(response)){
          
          content <- httr::content(response)
          
          content <- content[[1]]
          
          keep <- sapply(content$features, function(.ele) length(.ele$evidences)>2 && # filter the data by at least 2 evidences
                           
                           !grepl("Unclassified", .ele$clinicalSignificances)) # filter the data by classified clinical significances.
          
          nkeep <- c("wildType", "alternativeSequence", "begin", "end",
                     
                     "somaticStatus", "consequenceType", "score")
          
          content$features <- lapply(content$features[keep], function(.ele){
            
            .ele$score <- length(.ele$evidences)
            
            unlist(.ele[nkeep])
            
          })
          
          variation <- do.call(rbind, content$features)
          
          variation <-
            
            GRanges(chr,
                    
                    IRanges(as.numeric(variation[, "begin"]),
                            
                            width = 1,
                            
                            names = paste0(variation[, "wildType"],
                                           
                                           variation[, "begin"],
                                           
                                           variation[, "alternativeSequence"])),
                    
                    score = as.numeric(variation[, "score"]),
                    
                    color = as.numeric(factor(variation[, "consequenceType"]))+1)
          
          variation$label.parameter.gp <- gpar(cex=.5)
          
          lolliplot(variation, domains, ranges = gr, ylab = "# evidences", yaxis = FALSE)
          
        }else{
          
          message("Can not get variations. http error")
          
        }
        
      }else{
        
        message("Can not get features. http error")
        
      }
      
    },error=function(e){
      
      message(e)
      
    },warning=function(w){
      
      message(w)
      
    },interrupt=function(i){
      
      message(i)
      
    })