Commit d2766d6e authored by smorabit's avatar smorabit
Browse files

hub gene expression heatmap plot

parent 23890a5c
Loading
Loading
Loading
Loading
+141 −4
Original line number Diff line number Diff line
@@ -413,9 +413,6 @@ ModuleFeaturePlot<- function(
}





#' EnrichrBarPlot
#'
#' Makes barplots from Enrichr data
@@ -760,7 +757,7 @@ HubGeneNetworkPlot <- function(

  # sample the same number of genes in each module
  other_genes <- modules %>%
    subset(!(gene_list %in% unlist(hub_list))) %>%
    subset(!(gene_name %in% unlist(hub_list))) %>%
    group_by(module) %>%
    sample_n(n_other, replace=TRUE) %>%
    .$gene_name %>% unique
@@ -1145,3 +1142,143 @@ MotifOverlapBarPlot <- function(
  }

}



#' Plots gene expression of hub genes as a heatmap
#'
#' This function makes an expression heatmap of the top n hub genes per module
#' using Seurat's DoHeatmap, and then assembles them all into one big heatmap.
#'
#'
#'
#' @param seurat_obj A Seurat object
#' @param wgcna_name
#' @keywords scRNA-seq
#' @export
#' @examples
#' DoHubGeneHeatmap
DoHubGeneHeatmap <- function(
  seurat_obj,
  n_hubs = 10,
  n_cells = 200,
  group.by=NULL,
  module_names = NULL,
  combine=TRUE, #returns a list of individual heatmaps if FALSE
  wgcna_name=NULL
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # use idents as grouping variable if not specified
  if(is.null(group.by)){
    group.by <- 'temp_ident'
    seurat_obj$temp_ident <- Idents(seurat_obj)
  }

  # get modules
  modules <- GetModules(seurat_obj)
  modules <- modules %>% subset(module != 'grey') %>% mutate(module = droplevels(module))
  mods <- levels(modules$module)

  if(!is.null(module_names)){
    print('here')
    mods <- module_names
    modules <- modules %>% subset(module %in% mods)
  }

  # get table of module names & colors
  mod_colors <- modules %>% dplyr::select(c(module, color)) %>% distinct

  # get hub genes:
  hub_list <- lapply(mods, function(cur_mod){
    cur <- subset(modules, module == cur_mod)
    cur[,c('gene_name', paste0('kME_', cur_mod))] %>%
      top_n(n_hubs) %>% .$gene_name
  })
  names(hub_list) <- mods

  seurat_obj$barcode <- colnames(seurat_obj)
  temp <- table(seurat_obj@meta.data[[group.by]])

  # sample cells
  df <- data.frame()
  for(i in 1:length(temp)){

    if(temp[[i]] < n_cells){
      cur_df <- seurat_obj@meta.data %>% subset(get(group.by) == names(temp)[i])
    } else{
      cur_df <- seurat_obj@meta.data %>% subset(get(group.by) == names(temp)[i]) %>% sample_n(n_cells);
    }
    df <- rbind(df, cur_df)
  }

  # make sampled seurat obj for plotting:
  seurat_plot <- seurat_obj %>% subset(barcode %in% df$barcode)

  plot_list <- list()
  for(i in 1:length(hub_list)){

    print(i)
    cur_mod <- names(hub_list)[i]
    print(i)
    print(hub_list[[i]])
    print(i)

    if(i == 1){
      plot_list[[i]] <- DoHeatmap(
        seurat_plot,
        features = hub_list[[i]],
        group.by=group.by,
        raster=TRUE, slot='scale.data',
        disp.min = -2.5, disp.max=2.5,
        label=FALSE, group.bar=FALSE
      )
    } else{
      plot_list[[i]] <- DoHeatmap(
       seurat_plot,
       features=hub_list[[i]],
       group.by=group.by,
       raster=TRUE, slot='scale.data',
       group.bar.height=0,
       label=FALSE, group.bar=FALSE,
       disp.min = -2.5, disp.max=2.5
     ) + NoLegend()
    }
    print(i)
    # margin:
    plot_list[[i]] <- plot_list[[i]] +
      theme(
        plot.margin = margin(0,0,0,0),
        axis.text.y = element_text(face='italic')
      )  + scale_y_discrete(position = "right")
    print(i)

  }

  # module colorbar
  mod_colors$value <- n_hubs
  mod_colors$dummy <- 'colorbar'
  cbar_list <- list()
  for(i in 1:nrow(mod_colors)){
    cbar_list[[i]] <- mod_colors[i,] %>% ggplot(aes(y=value, x=dummy)) +
      geom_bar(position='stack', stat='identity', fill=mod_colors[i,]$color) +
      umap_theme + theme(
        plot.margin=margin(0,0,0,0)
      )
  }
  p_cbar <- wrap_plots(cbar_list, ncol=1)

  n_total_cells <- ncol(seurat_plot)
  width_cbar <- n_total_cells / 50

  if(combine){
    out <- wrap_plots(plot_list, ncol=1)  + plot_layout(guides='collect')
    out <- (p_cbar | out) + plot_layout(widths=c(width_cbar, n_total_cells))
  } else{
    out <- plot_list
  }

  out

}
+92 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ library(corrplot)
library(enrichR)
library(GeneOverlap)
library(WGCNA)
library(extrafont)
enableWGCNAThreads(nThreads = 8)

set.seed(2021)
@@ -878,6 +879,97 @@ test <- future_lapply(1:100, function(i){

```

Test Hub gene expression heatmap:

```{r eval=FALSE}

# TODO: Add Top Colorbar for cell-types 

# run heatmap function
cur_seurat$annotation <- droplevels(cur_seurat$annotation)
p <- DoHubGeneHeatmap(
  cur_seurat,
  group.by = 'annotation',
  n_hubs=10,
  n_cells=200,
  module_names = c('blue', 'magenta', 'purple', 'turquoise', 'black', 'red')
)

pdf(paste0(fig_dir, 'test_hubgene_heatmap.pdf'), width=10, height=10, useDingbats=FALSE)
p
dev.off()

```



Hub gene network plot using UMAP embedding, similar to Pando

```{r eval=FALSE}
# get the TOM
TOM <- GetTOM(cur_seurat)

# get modules, MEs:
MEs <- GetMEs(cur_seurat)
modules <- GetModules(cur_seurat)
mods <- levels(modules$module)
mods <- mods[mods != 'grey']

n_hubs <- 50
n_other <- 25

# get hub genes:
hub_list <- lapply(mods, function(cur_mod){
  cur <- subset(modules, module == cur_mod)
  cur[,c('gene_name', paste0('kME_', cur_mod))] %>%
    top_n(n_hubs) %>% .$gene_name
})
names(hub_list) <- mods

# sample the same number of genes in each module
other_genes <- modules %>%
  subset(!(gene_name %in% unlist(hub_list))) %>%
  group_by(module) %>%
  sample_n(n_other, replace=TRUE) %>%
  .$gene_name %>% unique

# subset TOM by these genes:
gene_list <- c(as.character(unlist(hub_list)), other_genes)
cur_TOM <- TOM[gene_list,gene_list]

# run UMAP
hub_umap <-  uwot::umap(cur_TOM)

# set up plotting df
plot_df <- as.data.frame(hub_umap) %>%
  rename(V1='UMAP1', V2='UMAP2') %>%
  mutate(gene=rownames(cur_TOM))

# add module color, and hub gene status to the plotting df:

ix <- match(plot_df$gene, modules$gene_name)
plot_df$module <- modules$module[ix]
plot_df$color <- modules$color[ix]
plot_df$hub <- ifelse(
  plot_df$gene %in% as.character(unlist(hub_list)), 'hub', 'other'
)
plot_df$size <- ifelse(
  plot_df$hub == 'hub', 2,1
)


p <- ggplot(plot_df, aes(x=UMAP1, y=UMAP2)) +
     geom_point(color=plot_df$color, size=plot_df$size) +
     umap_theme

pdf(paste0(fig_dir, 'test_hubgene_umap.pdf'), width=6, height=6, useDingbats=FALSE)
p
dev.off()

```




Project onto a mouse visium dataset (one of our new ones?)