I have a Seurat object with defined clusters. I need to extract a list of all genes that are expressed by at least 10% of cells in my cluster. I need to repeat it for every cluster that I have, separately.
I know one code that could potentially extract genes expressed by at least 10% of cells from the whole Seurat:
genes.to.keep <- Matrix::rowSums(Monocyte.integrated@assays$RNA@counts > 0) >= floor(0.1 * ncol(Monocyte.integrated@assays$RNA@counts))
counts.sub <- Monocyte.integrated@assays$RNA@counts[genes.to.keep,]
But this is not what I want. And I'm not sure how to modify it to include cluster names (considering it's correct). I store the cluster names in the metadata variable called "cluster_names".
I would appreciate any help
BW
You could use lapply
to iterate over the factor levels of your clusters to subset and filter them individually and use setNames
to name the resulting list. Below is a reproducible example:
library(Seurat)
data("pbmc_small")
pbmc_small <- FindClusters(pbmc_small, resolution = 1)
names(pbmc_small@meta.data)[names(pbmc_small@meta.data)=="seurat_clusters"] <- "cluster_names"
levels(pbmc_small$cluster_names) <- paste0("cluster_", seq_along(levels(pbmc_small$cluster_names)))
setNames(lapply(levels(pbmc_small$cluster_names), function(x) {
p <- subset(pbmc_small, cluster_names==x)
rownames(p)[Matrix::rowSums(p@assays$RNA@counts > 0) >= .1*dim(p)[2]]
}), levels(pbmc_small$cluster_names))
#> $cluster_1
#> [1] "CD79B" "HLA-DRA" "LTB" "SP100" "PPP3CC" "CXCR4"
#> [7] "STX10" "SNHG7" "CD3D" "NOSIP" "SAFB2" "CD2"
#> [13] "IL7R" "PIK3IP1" "MPHOSPH6" "KHDRBS1" "MAL" "CCR7"
#> [19] "THYN1" "TAF7" "LDHB" "TMEM123" "EPC1" "EIF4A2"
#> [25] "CD3E" "TMUB1" "BLOC1S4" "SRSF7" "ACAP1" "TNFAIP8"
#> [31] "CD7" "TAGAP" "DNAJB1" "ASNSD1" "S1PR4" "CTSW"
#> [37] "GZMK" "NKG7" "IL32" "DNAJC2" "LYAR" "CST7"
#> [43] "LCK" "CCL5" "HNRNPH1" "SSR2" "GIMAP1" "MMADHC"
#> [49] "CD8A" "GYPC" "HNRNPF" "RPL7L1" "KLRG1" "CRBN"
#> [55] "SATB1" "PMPCB" "NRBP1" "TCF7" "HNRNPA3" "S100A8"
#> [61] "S100A9" "LYZ" "FCN1" "TYROBP" "NFKBIA" "TYMP"
#> [67] "CTSS" "TSPO" "CTSB" "LGALS1" "BLVRA" "LGALS3"
#> [73] "IFI6" "HLA-DPA1" "CST3" "GSTP1" "EIF3G" "VPS28"
#> [79] "ZFP36L1" "ANXA2" "HSP90AA1" "LST1" "AIF1" "PSAP"
#> [85] "YWHAB" "MYO1G" "SAT1" "RGS2" "FCGR3A" "S100A11"
#> [91] "FCER1G" "IFITM2" "COTL1" "LGALS9" "CD68" "RHOC"
#> [97] "CARD16" "COPS6" "PPBP" "GPX1" "TPM4" "PF4"
#> [103] "SDPR" "NRGN" "SPARC" "GNG11" "CLU" "HIST1H2AC"
#> [109] "NCOA4" "GP9" "FERMT3" "ODC1" "CD9" "RUFY1"
#> [115] "TUBB1" "TALDO1" "TREML1" "NGFRAP1" "PGRMC1" "CA2"
#> [121] "ITGA2B" "MYL9" "TMEM40" "PARVB" "PTCRA" "ACRBP"
#> [127] "TSC22D1" "VDAC3" "GZMB" "GZMA" "GNLY" "FGFBP2"
#> [133] "AKR1C3" "CCL4" "PRF1" "GZMH" "XBP1" "GZMM"
#> [139] "PTGDR" "IGFBP7" "TTC38" "KLRD1" "ARHGDIA" "IL2RB"
#> [145] "CLIC3" "PPP1R18" "CD247" "ALOX5AP" "XCL2" "C12orf75"
#> [151] "RARRES3" "PCMT1" "LAMP1" "SPON2"
#>
#> $cluster_2
#> [1] "CD79B" "CD79A" "HLA-DRA" "HLA-DQB1"
#> [5] "HVCN1" "HLA-DMB" "LTB" "SP100"
#> [9] "NCF1" "EAF2" "FAM96A" "CXCR4"
#> [13] "STX10" "SNHG7" "NT5C" "NOSIP"
#> [17] "IL7R" "KHDRBS1" "TAF7" "LDHB"
#> [21] "TMEM123" "EIF4A2" "TMUB1" "BLOC1S4"
#> [25] "SRSF7" "TNFAIP8" "TAGAP" "DNAJB1"
#> [29] "S1PR4" "NKG7" "IL32" "DNAJC2"
#> [33] "LYAR" "CCL5" "SSR2" "GIMAP1"
#> [37] "MMADHC" "HNRNPF" "RPL7L1" "HNRNPA3"
#> [41] "S100A8" "S100A9" "LYZ" "CD14"
#> [45] "FCN1" "TYROBP" "ASGR1" "NFKBIA"
#> [49] "TYMP" "CTSS" "TSPO" "RBP7"
#> [53] "CTSB" "LGALS1" "FPR1" "VSTM1"
#> [57] "BLVRA" "MPEG1" "BID" "SMCO4"
#> [61] "CFD" "LINC00936" "LGALS2" "MS4A6A"
#> [65] "FCGRT" "LGALS3" "NUP214" "SCO2"
#> [69] "IL17RA" "IFI6" "HLA-DPA1" "FCER1A"
#> [73] "CLEC10A" "HLA-DMA" "RGS1" "HLA-DPB1"
#> [77] "HLA-DQA1" "RNF130" "HLA-DRB5" "HLA-DRB1"
#> [81] "CST3" "IL1B" "POP7" "HLA-DQA2"
#> [85] "GSTP1" "EIF3G" "VPS28" "LY86"
#> [89] "ZFP36L1" "ANXA2" "GRN" "CFP"
#> [93] "HSP90AA1" "LST1" "AIF1" "PSAP"
#> [97] "YWHAB" "MYO1G" "SAT1" "RGS2"
#> [101] "SERPINA1" "IFITM3" "FCGR3A" "LILRA3"
#> [105] "S100A11" "FCER1G" "TNFRSF1B" "IFITM2"
#> [109] "WARS" "IFI30" "MS4A7" "C5AR1"
#> [113] "HCK" "COTL1" "LGALS9" "CD68"
#> [117] "RP11-290F20.3" "RHOC" "CARD16" "LRRC25"
#> [121] "COPS6" "ADAR" "GPX1" "TPM4"
#> [125] "NRGN" "NCOA4" "FERMT3" "ODC1"
#> [129] "TALDO1" "PARVB" "VDAC3" "GZMB"
#> [133] "XBP1" "IGFBP7" "ARHGDIA" "PPP1R18"
#> [137] "ALOX5AP" "RARRES3" "PCMT1" "SPON2"
#>
#> $cluster_3
#> [1] "MS4A1" "CD79B" "CD79A" "HLA-DRA"
#> [5] "TCL1A" "HLA-DQB1" "HVCN1" "HLA-DMB"
#> [9] "LTB" "LINC00926" "FCER2" "SP100"
#> [13] "NCF1" "PPP3CC" "EAF2" "PPAPDC1B"
#> [17] "CD19" "KIAA0125" "CYB561A3" "CD180"
#> [21] "RP11-693J15.5" "FAM96A" "CXCR4" "STX10"
#> [25] "SNHG7" "NT5C" "BANK1" "IGLL5"
#> [29] "CD200" "FCRLA" "CD3D" "NOSIP"
#> [33] "CD2" "IL7R" "PIK3IP1" "KHDRBS1"
#> [37] "THYN1" "TAF7" "LDHB" "TMEM123"
#> [41] "CCDC104" "EPC1" "EIF4A2" "CD3E"
#> [45] "SRSF7" "ACAP1" "TNFAIP8" "CD7"
#> [49] "TAGAP" "DNAJB1" "S1PR4" "CTSW"
#> [53] "GZMK" "NKG7" "IL32" "DNAJC2"
#> [57] "LYAR" "CST7" "LCK" "CCL5"
#> [61] "HNRNPH1" "SSR2" "GIMAP1" "MMADHC"
#> [65] "CD8A" "PTPN22" "GYPC" "HNRNPF"
#> [69] "RPL7L1" "CRBN" "SATB1" "SIT1"
#> [73] "PMPCB" "NRBP1" "TCF7" "HNRNPA3"
#> [77] "S100A9" "LYZ" "FCN1" "TYROBP"
#> [81] "NFKBIA" "TYMP" "CTSS" "TSPO"
#> [85] "CTSB" "LGALS1" "BLVRA" "MPEG1"
#> [89] "BID" "CFD" "LINC00936" "LGALS2"
#> [93] "MS4A6A" "FCGRT" "LGALS3" "SCO2"
#> [97] "HLA-DPA1" "FCER1A" "CLEC10A" "HLA-DMA"
#> [101] "RGS1" "HLA-DPB1" "HLA-DQA1" "RNF130"
#> [105] "HLA-DRB5" "HLA-DRB1" "CST3" "IL1B"
#> [109] "POP7" "HLA-DQA2" "CD1C" "GSTP1"
#> [113] "EIF3G" "VPS28" "LY86" "ZFP36L1"
#> [117] "ZNF330" "ANXA2" "GRN" "CFP"
#> [121] "HSP90AA1" "FUOM" "LST1" "AIF1"
#> [125] "PSAP" "YWHAB" "MYO1G" "SAT1"
#> [129] "RGS2" "SERPINA1" "IFITM3" "FCGR3A"
#> [133] "S100A11" "FCER1G" "TNFRSF1B" "IFITM2"
#> [137] "WARS" "IFI30" "MS4A7" "HCK"
#> [141] "COTL1" "LGALS9" "CD68" "RHOC"
#> [145] "CARD16" "LRRC25" "COPS6" "ADAR"
#> [149] "GPX1" "TPM4" "NCOA4" "FERMT3"
#> [153] "ODC1" "RUFY1" "TALDO1" "VDAC3"
#> [157] "GZMA" "GNLY" "FGFBP2" "PRF1"
#> [161] "XBP1" "GZMM" "PTGDR" "ARHGDIA"
#> [165] "PPP1R18" "CD247" "ALOX5AP" "XCL2"
#> [169] "C12orf75" "RARRES3" "PCMT1" "SPON2"
Created on 2021-03-26 by the reprex package (v1.0.0)