I am trying to run an R script on the command line of bash (I am using CentOS 8) with the command:
cat 1_myScript.R | R --slave --args $SAMPLE"_x" $SAMPLE"_y"
where $SAMPLE is an argument that I have specified in the R script as follows
> args<-commandArgs()
> aaa<-args[4]
This syntax always worked for all of my scripts, but now it gives me the following error:
Error in as.vector(x) : no method for coercing this S4 class to a vector
Calls: setdiff -> setdiff.default -> -> as.vector
Execution halted
the strange thing is that if I try to run this script in the R console
>source("1_myScript.R")
it proceeds with no errors. I have looked up and it appears to be a feature linked to the library "GenomicRanges", which I used in the script. Here is the body of my script (note that I don't know the exact line where it fails):
#!/usr/bin/env Rscript
library(rtracklayer)
library(data.table)
library(tidyverse)
#args <- commandArgs()
#aaa<-args[4]
aaa<-gsub("_T.finalSorted.bam_CNVs","",args[1])
targDir<-"/srv/ngsdata/dalteriog/SV_analysis/example/WGS_NB_novogene/CNVDir/"
dataTable <-fread((paste0(targDir,args[2]), header=TRUE)
ratio<-data.frame(dataTable)
dataTable <-fread(paste0(targDir,args[1]), header=FALSE)
cnvs<- data.frame(dataTable)
ratio$Ratio[which(ratio$Ratio==-1)]=NA
cnvs.bed=GRanges(cnvs[,1],IRanges(cnvs[,2],cnvs[,3])) # primo ogetto del GRange obj --> chr; secondo --> range
ratio.bed=GRanges(ratio$Chromosome,IRanges(ratio$Start,ratio$Start),score=ratio$Ratio) # score è un metadata
overlaps <- subsetByOverlaps(ratio.bed,cnvs.bed) # regioni overlappanti i due df
normals <- setdiff(ratio.bed,cnvs.bed) # regioni diverse
normals <- subsetByOverlaps(ratio.bed,normals) # la stessa cosa, ma con lo score associato
#mu <- mean(score(normals),na.rm=TRUE)
#sigma<- sd(score(normals),na.rm=TRUE)
#hist(score(normals),n=500,xlim=c(0,2))
#hist(log(score(normals)),n=500,xlim=c(-1,1))
#shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000])
#qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10)))
#qqline(score(normals)[which(!is.na(score(normals)))], col = 2)
#shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000])
#qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10)))
#qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2)
numberOfCol=length(cnvs)
for (i in c(1:length(cnvs[,1]))) {
values <- score(subsetByOverlaps(ratio.bed,cnvs.bed[i])) #score bayesiano della iesima CNV che overlappa con il file ratio
#wilcox.test(values,mu=mu)
W <- function(values,normals){resultw <- try(wilcox.test(values,score(normals)), silent = TRUE)
if(class(resultw)=="try-error") return(list("statistic"=NA,"parameter"=NA,"p.value"=NA,"null.value"=NA,"alternative"=NA,"method"=NA,"data.name"=NA)) else resultw}
KS <- function(values,normals){resultks <- try(ks.test(values,score(normals)), silent = TRUE)
if(class(resultks)=="try-error") return(list("statistic"=NA,"p.value"=NA,"alternative"=NA,"method"=NA,"data.name"=NA)) else resultks}
#resultks <- try(KS <- ks.test(values,score(normals)), silent = TRUE)
# if(class(resultks)=="try-error") NA) else resultks
cnvs[i,numberOfCol+1]=W(values,normals)$p.value
cnvs[i,numberOfCol+2]=KS(values,normals)$p.value
}
if (numberOfCol==5) {
names(cnvs)=c("chr","start","end","copy number","status","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
if (numberOfCol==7) {
names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
if (numberOfCol==9) {
names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","somatic/germline","precentageOfGermline","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
cnvs$Wfdr <- p.adjust(cnvs$WilcoxonRankSumTestPvalue, method="BH",n=nrow(cnvs))
cnvs$KSfdr <- p.adjust(cnvs$KolmogorovSmirnovPvalue, method="BH",n=nrow(cnvs))
cnvs<-subset(cnvs, Wfdr <= 0.05 & KSfdr <= 0.05)
samp<-gsub("_T.finalSorted.bam_CNVs","",aaa)
cnvs<-add_column(cnvs, Sample=samp, .before=1)
write.table(cnvs, file=paste(targDir,aaa,"CNV.p.filtered.txt",sep=""),sep="\t",quote=F,row.names=F)
it seems that the problem stands at the line 22:
normals <- setdiff(ratio.bed,cnvs.bed)
in this case, we want to find differences between two granges objects, but the function that does this operation is setdiff
of GenomicRanges, that is masked by the package base. The solution to the answer is to edit such line in this way:
normals <- GenomicRanges::setdiff(ratio.bed,cnvs.bed)
Thank you again for your effort!