rcluster-analysisextractseurat

Extract names of genes expressed by at least 10% of cells in a cluster


I have a Seurat object with defined clusters. I need to extract a list of all genes that are expressed by at least 10% of cells in my cluster. I need to repeat it for every cluster that I have, separately.

I know one code that could potentially extract genes expressed by at least 10% of cells from the whole Seurat:

genes.to.keep <- Matrix::rowSums(Monocyte.integrated@assays$RNA@counts > 0) >= floor(0.1 * ncol(Monocyte.integrated@assays$RNA@counts))
counts.sub <- Monocyte.integrated@assays$RNA@counts[genes.to.keep,]

But this is not what I want. And I'm not sure how to modify it to include cluster names (considering it's correct). I store the cluster names in the metadata variable called "cluster_names".

I would appreciate any help

BW


Solution

  • You could use lapply to iterate over the factor levels of your clusters to subset and filter them individually and use setNames to name the resulting list. Below is a reproducible example:

    library(Seurat)
    data("pbmc_small")
    pbmc_small <- FindClusters(pbmc_small, resolution = 1)
    names(pbmc_small@meta.data)[names(pbmc_small@meta.data)=="seurat_clusters"] <- "cluster_names"
    levels(pbmc_small$cluster_names) <- paste0("cluster_", seq_along(levels(pbmc_small$cluster_names)))
    setNames(lapply(levels(pbmc_small$cluster_names), function(x) {
      p <- subset(pbmc_small, cluster_names==x)
      rownames(p)[Matrix::rowSums(p@assays$RNA@counts > 0) >= .1*dim(p)[2]]
    }), levels(pbmc_small$cluster_names))
    #> $cluster_1
    #>   [1] "CD79B"     "HLA-DRA"   "LTB"       "SP100"     "PPP3CC"    "CXCR4"    
    #>   [7] "STX10"     "SNHG7"     "CD3D"      "NOSIP"     "SAFB2"     "CD2"      
    #>  [13] "IL7R"      "PIK3IP1"   "MPHOSPH6"  "KHDRBS1"   "MAL"       "CCR7"     
    #>  [19] "THYN1"     "TAF7"      "LDHB"      "TMEM123"   "EPC1"      "EIF4A2"   
    #>  [25] "CD3E"      "TMUB1"     "BLOC1S4"   "SRSF7"     "ACAP1"     "TNFAIP8"  
    #>  [31] "CD7"       "TAGAP"     "DNAJB1"    "ASNSD1"    "S1PR4"     "CTSW"     
    #>  [37] "GZMK"      "NKG7"      "IL32"      "DNAJC2"    "LYAR"      "CST7"     
    #>  [43] "LCK"       "CCL5"      "HNRNPH1"   "SSR2"      "GIMAP1"    "MMADHC"   
    #>  [49] "CD8A"      "GYPC"      "HNRNPF"    "RPL7L1"    "KLRG1"     "CRBN"     
    #>  [55] "SATB1"     "PMPCB"     "NRBP1"     "TCF7"      "HNRNPA3"   "S100A8"   
    #>  [61] "S100A9"    "LYZ"       "FCN1"      "TYROBP"    "NFKBIA"    "TYMP"     
    #>  [67] "CTSS"      "TSPO"      "CTSB"      "LGALS1"    "BLVRA"     "LGALS3"   
    #>  [73] "IFI6"      "HLA-DPA1"  "CST3"      "GSTP1"     "EIF3G"     "VPS28"    
    #>  [79] "ZFP36L1"   "ANXA2"     "HSP90AA1"  "LST1"      "AIF1"      "PSAP"     
    #>  [85] "YWHAB"     "MYO1G"     "SAT1"      "RGS2"      "FCGR3A"    "S100A11"  
    #>  [91] "FCER1G"    "IFITM2"    "COTL1"     "LGALS9"    "CD68"      "RHOC"     
    #>  [97] "CARD16"    "COPS6"     "PPBP"      "GPX1"      "TPM4"      "PF4"      
    #> [103] "SDPR"      "NRGN"      "SPARC"     "GNG11"     "CLU"       "HIST1H2AC"
    #> [109] "NCOA4"     "GP9"       "FERMT3"    "ODC1"      "CD9"       "RUFY1"    
    #> [115] "TUBB1"     "TALDO1"    "TREML1"    "NGFRAP1"   "PGRMC1"    "CA2"      
    #> [121] "ITGA2B"    "MYL9"      "TMEM40"    "PARVB"     "PTCRA"     "ACRBP"    
    #> [127] "TSC22D1"   "VDAC3"     "GZMB"      "GZMA"      "GNLY"      "FGFBP2"   
    #> [133] "AKR1C3"    "CCL4"      "PRF1"      "GZMH"      "XBP1"      "GZMM"     
    #> [139] "PTGDR"     "IGFBP7"    "TTC38"     "KLRD1"     "ARHGDIA"   "IL2RB"    
    #> [145] "CLIC3"     "PPP1R18"   "CD247"     "ALOX5AP"   "XCL2"      "C12orf75" 
    #> [151] "RARRES3"   "PCMT1"     "LAMP1"     "SPON2"    
    #> 
    #> $cluster_2
    #>   [1] "CD79B"         "CD79A"         "HLA-DRA"       "HLA-DQB1"     
    #>   [5] "HVCN1"         "HLA-DMB"       "LTB"           "SP100"        
    #>   [9] "NCF1"          "EAF2"          "FAM96A"        "CXCR4"        
    #>  [13] "STX10"         "SNHG7"         "NT5C"          "NOSIP"        
    #>  [17] "IL7R"          "KHDRBS1"       "TAF7"          "LDHB"         
    #>  [21] "TMEM123"       "EIF4A2"        "TMUB1"         "BLOC1S4"      
    #>  [25] "SRSF7"         "TNFAIP8"       "TAGAP"         "DNAJB1"       
    #>  [29] "S1PR4"         "NKG7"          "IL32"          "DNAJC2"       
    #>  [33] "LYAR"          "CCL5"          "SSR2"          "GIMAP1"       
    #>  [37] "MMADHC"        "HNRNPF"        "RPL7L1"        "HNRNPA3"      
    #>  [41] "S100A8"        "S100A9"        "LYZ"           "CD14"         
    #>  [45] "FCN1"          "TYROBP"        "ASGR1"         "NFKBIA"       
    #>  [49] "TYMP"          "CTSS"          "TSPO"          "RBP7"         
    #>  [53] "CTSB"          "LGALS1"        "FPR1"          "VSTM1"        
    #>  [57] "BLVRA"         "MPEG1"         "BID"           "SMCO4"        
    #>  [61] "CFD"           "LINC00936"     "LGALS2"        "MS4A6A"       
    #>  [65] "FCGRT"         "LGALS3"        "NUP214"        "SCO2"         
    #>  [69] "IL17RA"        "IFI6"          "HLA-DPA1"      "FCER1A"       
    #>  [73] "CLEC10A"       "HLA-DMA"       "RGS1"          "HLA-DPB1"     
    #>  [77] "HLA-DQA1"      "RNF130"        "HLA-DRB5"      "HLA-DRB1"     
    #>  [81] "CST3"          "IL1B"          "POP7"          "HLA-DQA2"     
    #>  [85] "GSTP1"         "EIF3G"         "VPS28"         "LY86"         
    #>  [89] "ZFP36L1"       "ANXA2"         "GRN"           "CFP"          
    #>  [93] "HSP90AA1"      "LST1"          "AIF1"          "PSAP"         
    #>  [97] "YWHAB"         "MYO1G"         "SAT1"          "RGS2"         
    #> [101] "SERPINA1"      "IFITM3"        "FCGR3A"        "LILRA3"       
    #> [105] "S100A11"       "FCER1G"        "TNFRSF1B"      "IFITM2"       
    #> [109] "WARS"          "IFI30"         "MS4A7"         "C5AR1"        
    #> [113] "HCK"           "COTL1"         "LGALS9"        "CD68"         
    #> [117] "RP11-290F20.3" "RHOC"          "CARD16"        "LRRC25"       
    #> [121] "COPS6"         "ADAR"          "GPX1"          "TPM4"         
    #> [125] "NRGN"          "NCOA4"         "FERMT3"        "ODC1"         
    #> [129] "TALDO1"        "PARVB"         "VDAC3"         "GZMB"         
    #> [133] "XBP1"          "IGFBP7"        "ARHGDIA"       "PPP1R18"      
    #> [137] "ALOX5AP"       "RARRES3"       "PCMT1"         "SPON2"        
    #> 
    #> $cluster_3
    #>   [1] "MS4A1"         "CD79B"         "CD79A"         "HLA-DRA"      
    #>   [5] "TCL1A"         "HLA-DQB1"      "HVCN1"         "HLA-DMB"      
    #>   [9] "LTB"           "LINC00926"     "FCER2"         "SP100"        
    #>  [13] "NCF1"          "PPP3CC"        "EAF2"          "PPAPDC1B"     
    #>  [17] "CD19"          "KIAA0125"      "CYB561A3"      "CD180"        
    #>  [21] "RP11-693J15.5" "FAM96A"        "CXCR4"         "STX10"        
    #>  [25] "SNHG7"         "NT5C"          "BANK1"         "IGLL5"        
    #>  [29] "CD200"         "FCRLA"         "CD3D"          "NOSIP"        
    #>  [33] "CD2"           "IL7R"          "PIK3IP1"       "KHDRBS1"      
    #>  [37] "THYN1"         "TAF7"          "LDHB"          "TMEM123"      
    #>  [41] "CCDC104"       "EPC1"          "EIF4A2"        "CD3E"         
    #>  [45] "SRSF7"         "ACAP1"         "TNFAIP8"       "CD7"          
    #>  [49] "TAGAP"         "DNAJB1"        "S1PR4"         "CTSW"         
    #>  [53] "GZMK"          "NKG7"          "IL32"          "DNAJC2"       
    #>  [57] "LYAR"          "CST7"          "LCK"           "CCL5"         
    #>  [61] "HNRNPH1"       "SSR2"          "GIMAP1"        "MMADHC"       
    #>  [65] "CD8A"          "PTPN22"        "GYPC"          "HNRNPF"       
    #>  [69] "RPL7L1"        "CRBN"          "SATB1"         "SIT1"         
    #>  [73] "PMPCB"         "NRBP1"         "TCF7"          "HNRNPA3"      
    #>  [77] "S100A9"        "LYZ"           "FCN1"          "TYROBP"       
    #>  [81] "NFKBIA"        "TYMP"          "CTSS"          "TSPO"         
    #>  [85] "CTSB"          "LGALS1"        "BLVRA"         "MPEG1"        
    #>  [89] "BID"           "CFD"           "LINC00936"     "LGALS2"       
    #>  [93] "MS4A6A"        "FCGRT"         "LGALS3"        "SCO2"         
    #>  [97] "HLA-DPA1"      "FCER1A"        "CLEC10A"       "HLA-DMA"      
    #> [101] "RGS1"          "HLA-DPB1"      "HLA-DQA1"      "RNF130"       
    #> [105] "HLA-DRB5"      "HLA-DRB1"      "CST3"          "IL1B"         
    #> [109] "POP7"          "HLA-DQA2"      "CD1C"          "GSTP1"        
    #> [113] "EIF3G"         "VPS28"         "LY86"          "ZFP36L1"      
    #> [117] "ZNF330"        "ANXA2"         "GRN"           "CFP"          
    #> [121] "HSP90AA1"      "FUOM"          "LST1"          "AIF1"         
    #> [125] "PSAP"          "YWHAB"         "MYO1G"         "SAT1"         
    #> [129] "RGS2"          "SERPINA1"      "IFITM3"        "FCGR3A"       
    #> [133] "S100A11"       "FCER1G"        "TNFRSF1B"      "IFITM2"       
    #> [137] "WARS"          "IFI30"         "MS4A7"         "HCK"          
    #> [141] "COTL1"         "LGALS9"        "CD68"          "RHOC"         
    #> [145] "CARD16"        "LRRC25"        "COPS6"         "ADAR"         
    #> [149] "GPX1"          "TPM4"          "NCOA4"         "FERMT3"       
    #> [153] "ODC1"          "RUFY1"         "TALDO1"        "VDAC3"        
    #> [157] "GZMA"          "GNLY"          "FGFBP2"        "PRF1"         
    #> [161] "XBP1"          "GZMM"          "PTGDR"         "ARHGDIA"      
    #> [165] "PPP1R18"       "CD247"         "ALOX5AP"       "XCL2"         
    #> [169] "C12orf75"      "RARRES3"       "PCMT1"         "SPON2"
    

    Created on 2021-03-26 by the reprex package (v1.0.0)