I have a dataframe in R:
dput(trans_eqtl[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086",
"ENSG00000265688", "ENSG00000265688", "ENSG00000265688"), `Gene-Chr` = c(11,
11, 17, 17, 17), `Gene-Pos` = c(133980358, 133980358, 79887167,
79887167, 79887167), RsId = c("rs3811444", "rs35340377", "rs705705",
"rs1131017", "rs10876864"), `SNP-Chr` = c(1, 1, 12, 12, 12),
`SNP-Pos` = c(248039451, 248038210, 56435504, 56435929, 56401085
)), row.names = c(NA, 5L), class = "data.frame")
I want to check whether gene-chr and snp-chr are present in same chromosome or different chromosomes. I wrote a code which goes through each row and check for these two condition. and used rbind function during the loop to add the rows which comes under these two conditions into separate intra and inter variable. But in my case: rbind function keeps on adding the same rows to the final variable. This is the code for reference:
##check for intra and inter chrom:
##intra:
inter <- NULL
intra <- NULL
for(i in 1:240){
if(!(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i])){
x <- which(!(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]==TRUE))
value <- trans_eqtl[x,]
}
inter <- rbind(inter,value)
}
##check for intra:
for(i in 1:240){
if(trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]){
y <- which((trans_eqtl$`Gene-Chr`[i] %in% trans_eqtl$`SNP-Chr`[i]==TRUE))
value1 <- trans_eqtl[y,]
}
intra <- rbind(intra,value1)
}
My output variable looks like this:
dput(intra[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086",
"ENSG00000166086", "ENSG00000166086", "ENSG00000166086"), `Gene-Chr` = c(11,
11, 11, 11, 11), `Gene-Pos` = c(133980358, 133980358, 133980358,
133980358, 133980358), RsId = c("rs3811444", "rs3811444", "rs3811444",
"rs3811444", "rs3811444"), `SNP-Chr` = c(1, 1, 1, 1, 1), `SNP-Pos` = c(248039451,
248039451, 248039451, 248039451, 248039451)), row.names = c(NA,
5L), class = "data.frame")
dput(inter[1:5,1:6])
structure(list(Gene = c("ENSG00000166086", "ENSG00000166086",
"ENSG00000166086", "ENSG00000166086", "ENSG00000166086"), `Gene-Chr` = c(11,
11, 11, 11, 11), `Gene-Pos` = c(133980358, 133980358, 133980358,
133980358, 133980358), RsId = c("rs3811444", "rs3811444", "rs3811444",
"rs3811444", "rs3811444"), `SNP-Chr` = c(1, 1, 1, 1, 1), `SNP-Pos` = c(248039451,
248039451, 248039451, 248039451, 248039451)), row.names = c(NA,
5L), class = "data.frame")
According to me: rbind function should be inside the loop to capture all the i's value matching the condition and keep on adding on the final variable. Does anyone know how to solve this issue? Thank you.
You could try a dplyr
approach:
library(dplyr)
inter <- trans_eqtl %>%
filter(`Gene-Chr` != `SNP-Chr`)
intra <- trans_eqtl %>%
filter(`Gene-Chr` == `SNP-Chr`)
output:
#inter:
Gene Gene-Chr Gene-Pos RsId SNP-Chr SNP-Pos
1 ENSG00000166086 11 133980358 rs3811444 1 248039451
2 ENSG00000166086 11 133980358 rs35340377 1 248038210
3 ENSG00000265688 17 79887167 rs705705 12 56435504
4 ENSG00000265688 17 79887167 rs1131017 12 56435929
5 ENSG00000265688 17 79887167 rs10876864 12 56401085
# intra:
[1] Gene Gene-Chr Gene-Pos RsId SNP-Chr SNP-Pos
<0 Zeilen> (oder row.names mit Länge 0)