rlapplycomplexheatmap

Complexheatmap with multiple files plotting


I would like to use Complexheatmap for multiple files for plotting individual data frame or files .

So far I was able to do this as for small subset of files.

Reading files as list

list_of_files <- list.files('Model_hmap/',pattern = '\\.txt$', full.names = TRUE)


#Further arguments to read.csv can be passed in ...
#all_csv <- lapply(list_of_files,read_delim,delim = "\t", escape_double = FALSE,trim_ws = TRUE)

all_csv <- lapply(list_of_files,read.table,strip.white = FALSE,check.names = FALSE,header=TRUE,row.names=1)
#my_names = c("gene","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj","UP_DOWN")
my_names = c("Symbol","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj","UP_DOWN")

#my_names = c['X2']

#my_names = c("Peak","annotation","ENSEMBL","log2FoldChange","padj","UP_DOWN")
result_abd = lapply(all_csv, FUN = function(x) subset(x, select=-c(1:7,155)))





names(result_abd) <- gsub(".txt","",
                          list.files("Model_hmap/",full.names = FALSE),
                          fixed = TRUE)

Then Scaling the data

fun <- function(result_abd) {
  p <- t(scale(t(result_abd[,1:ncol(result_abd)])))
}

p2 <- mapply(fun, result_abd, SIMPLIFY = FALSE)

Next step was to use the metadata which i would like to annotate my heat-map

My metadata is as such

dput(head(metadata))
structure(list(patient = c("TCGA-AB-2856", "TCGA-AB-2849", "TCGA-AB-2971", 
"TCGA-AB-2930", "TCGA-AB-2891", "TCGA-AB-2872"), prior_malignancy = c("no", 
"no", "no", "no", "no", "no"), FAB = c("M4", "M0", "M4", "M2", 
"M1", "M3"), Risk_Cyto = c("Intermediate", "Poor", "Intermediate", 
"Intermediate", "Poor", "Good")), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))

To read the above metadata I'm doing this below Im not sure if its the right way or approach.

list_of_files1 <- list.files('Model_hmap_meta/',pattern = '\\.txt$', full.names = TRUE)
#Further arguments to read.csv can be passed in ...
meta1 <- lapply(list_of_files1,read.table, row.names = 1,sep = "\t",header = TRUE)

Now I'm stuck at the above step Im not sure how do I pass the argument as list which i have done for the dataframe of my gene expression which I had calculated the zscore which is a list. So I think the metadata should be the same class if I have to use this .

For single file This is how I used to annotation into my final plot

metadata =  read_delim("Model_hmap_meta/FAB_table.txt",delim = "\t", escape_double = FALSE, 
                       trim_ws = TRUE)
head(metadata)
dim(metadata)
ann <- data.frame(metadata$FAB, metadata$Risk_Cyto)
colnames(ann) <- c('FAB', 'Risk_Cyto')
colours <- list('FAB' = c('M0' = 'red2', 'M1' = 'royalblue', 'M2'='gold','M3'='forestgreen','M4'='chocolate','M5'='Purple'),
                'Risk_Cyto' = c('Good' = 'limegreen', 'Intermediate' = 'navy' , 'N.D.' ='magenta','Poor'='black'))
colAnn <- HeatmapAnnotation(df = ann,
                            which = 'col',
                            col = colours,
                            annotation_width = unit(c(1, 4), 'cm'),
                            gap = unit(1, 'mm'))

Now this is what I need to pass it to the list if I understand which I'm not able to do

My plotting function.

This is the code I use to plot.

hm1 <- Heatmap(heat,
               col= colorRamp2(c(-2.6,-1,0,1,2.6),c("blue","skyblue","white","lightcoral","red")),

                              #heatmap_legend_param=list(at=c(-2.6,-1,0,1,2.6),color_bar="continuous",
                #                         legend_direction="vertical", legend_width=unit(5,"cm"),
                 #                        title_position="topcenter", title_gp=gpar(fontsize=10, fontface="bold")),
               name = "Z-score",
               
               #Row annotation configurations
               cluster_rows=T,
               show_row_dend=FALSE,
               row_title_side="right",
               row_title_gp=gpar(fontsize=8),
               show_row_names=FALSE,
               row_names_side="left",
               
               #Column annotation configuratiions
               cluster_columns=T,
               show_column_dend=T,
               column_title="DE genes",
               column_title_side="top",
               column_title_gp=gpar(fontsize=15, fontface="bold"),
               show_column_names = FALSE,
               column_names_gp = gpar(fontsize = 12, fontface="bold"),
               
               #Dendrogram configurations: columns
               clustering_distance_columns="euclidean",
               clustering_method_columns="complete",
               column_dend_height=unit(10,"mm"),
               
               #Dendrogram configurations: rows
               clustering_distance_rows="euclidean",
               clustering_method_rows="complete",
               row_dend_width=unit(4,"cm"),
               row_dend_side = "left",
               row_dend_reorder = TRUE,
               
               #Splits
               border=T,
               row_km = 1,
               column_km = 1,
               
               #plot params
               #width = unit(5, "inch"),
               #height = unit(4, "inch"),
               #height = unit(0.4, "cm")*nrow(mat),
               
               #Annotations
               top_annotation = colAnn)

# plot heatmap
draw(hm1, annotation_legend_side = "right", heatmap_legend_side="right")

Objective How do I wrap all the above into a small function where I can take input multiple files and plot them.

UPDATE Data files

My data files my metadafile


Solution

  • Using the code you provided I made the following function (make_heatmap). Some of the read in statements are altered to match what I was working with on my machine. I also only used 2 of your files but it should work with all 4 that you're using.

    This function will allow you to pass the counts matrix (which you normalize and set up before passing to the function). The assumption is that you're using the same metadata/annotation for each file you're passing. If you have different annotation files you could set up the heatmap annotation before the function and then pass that to the function. This is a bit more tedious though.

    Usually the way that I set up my heatmap analyzes is that I have a script containing all of my functions (one for each type of heatmap I have to make) and then every time I need to make a new heatmap I have another script where I read in/prepare (ie median center) my counts matrix and then call the heatmap function I need.

    list_of_files <- dir(pattern = 'MAP', full.names = TRUE)
    
    #Further arguments to read.csv can be passed in ...
    #all_csv <- lapply(list_of_files,read_delim,delim = "\t", escape_double = FALSE,trim_ws = TRUE)
    
    all_csv <- lapply(list_of_files,read.table,strip.white = FALSE,check.names = FALSE,header=TRUE,row.names=1)
    #my_names = c("gene","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj","UP_DOWN")
    my_names = c("Symbol","baseMean","log2FoldChange","lfcSE","stat","pvalue","padj","UP_DOWN")
    
    #my_names = c['X2']
    
    #my_names = c("Peak","annotation","ENSEMBL","log2FoldChange","padj","UP_DOWN")
    result_abd = lapply(all_csv, FUN = function(x) subset(x, select=-c(1:7,155)))
    
    names(result_abd) <- gsub(".txt","",
                              list.files("Model_hmap/",full.names = FALSE),
                              fixed = TRUE)
    
    fun <- function(result_abd) {
      p <- t(scale(t(result_abd[,1:ncol(result_abd)])))
    }
    
    p2 <- mapply(fun, result_abd, SIMPLIFY = FALSE)
    
    # list_of_files1 <- list.files('Model_hmap_meta/',pattern = '\\.txt$', full.names = TRUE)
    # #Further arguments to read.csv can be passed in ...
    # meta1 <- lapply(list_of_files1,read.table, row.names = 1,sep = "\t",header = TRUE)
    
    
    make_heatmap<-function(counts_matrix){
      
      metadata =  read.table("FAB_table.txt",sep = "\t", header=1)
      
      head(metadata)
      dim(metadata)
      ann <- data.frame(metadata$FAB, metadata$Risk_Cyto)
      colnames(ann) <- c('FAB', 'Risk_Cyto')
      colours <- list('FAB' = c('M0' = 'red2', 'M1' = 'royalblue', 'M2'='gold','M3'='forestgreen','M4'='chocolate','M5'='Purple'),
                      'Risk_Cyto' = c('Good' = 'limegreen', 'Intermediate' = 'navy' , 'N.D.' ='magenta','Poor'='black'))
      colAnn <- HeatmapAnnotation(df = ann,
                                  which = 'col',
                                  col = colours,
                                  annotation_width = unit(c(1, 4), 'cm'),
                                  gap = unit(1, 'mm'))
      
      hm1 <- Heatmap(counts_matrix,
                     col= colorRamp2(c(-2.6,-1,0,1,2.6),c("blue","skyblue","white","lightcoral","red")),
                     
                     #heatmap_legend_param=list(at=c(-2.6,-1,0,1,2.6),color_bar="continuous",
                     #                         legend_direction="vertical", legend_width=unit(5,"cm"),
                     #                        title_position="topcenter", title_gp=gpar(fontsize=10, fontface="bold")),
                     name = "Z-score",
                     
                     #Row annotation configurations
                     cluster_rows=T,
                     show_row_dend=FALSE,
                     row_title_side="right",
                     row_title_gp=gpar(fontsize=8),
                     show_row_names=FALSE,
                     row_names_side="left",
                     
                     #Column annotation configuratiions
                     cluster_columns=T,
                     show_column_dend=T,
                     column_title="DE genes",
                     column_title_side="top",
                     column_title_gp=gpar(fontsize=15, fontface="bold"),
                     show_column_names = FALSE,
                     column_names_gp = gpar(fontsize = 12, fontface="bold"),
                     
                     #Dendrogram configurations: columns
                     clustering_distance_columns="euclidean",
                     clustering_method_columns="complete",
                     column_dend_height=unit(10,"mm"),
                     
                     #Dendrogram configurations: rows
                     clustering_distance_rows="euclidean",
                     clustering_method_rows="complete",
                     row_dend_width=unit(4,"cm"),
                     row_dend_side = "left",
                     row_dend_reorder = TRUE,
                     
                     #Splits
                     border=T,
                     row_km = 1,
                     column_km = 1,
                     
                     #plot params
                     #width = unit(5, "inch"),
                     #height = unit(4, "inch"),
                     #height = unit(0.4, "cm")*nrow(mat),
                     
                     #Annotations
                     top_annotation = colAnn)
      
      # plot heatmap
      draw(hm1, annotation_legend_side = "right", heatmap_legend_side="right")
    }
    
    make_heatmap(as.matrix(p2[[1]])) #just call the function with the counts matrix
    make_heatmap(as.matrix(p2[[2]]))
    

    If you need to output the heatmap to a pdf or something, you can do that before calling the function or you can put that command inside of the heatmap function (just make sure to call dev.off() inside the function too in that case).