Commit 0cad04fd authored by smorabit's avatar smorabit
Browse files

working on functions for Module UMAPs

parent 66694d53
Loading
Loading
Loading
Loading
+95 −0
Original line number Diff line number Diff line
@@ -1282,3 +1282,98 @@ OverlapModulesMotifs <- function(
  seruat_obj <- SetMotifOverlap(seurat_obj, overlap_df, wgcna_name)

}


#' Run UMAP on co-expression matrix using hub genes as features.
#'
#' @param seurat_obj A Seurat object
#' @param n_hubs
#' @param exclude_grey
#' @param wgcna_name
#' @keywords scRNA-seq
#' @export
#' @examples
#' RunModuleUMAP
RunModuleUMAP <- function(
  seurat_obj,
  n_hubs = 50,
  exclude_grey = TRUE,
  harmonized=TRUE,
  wgcna_name = NULL,
  n_neighbors= 25,
  metric = "cosine",
  spread=1,
  min_dist=0.4,
  ...
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # get the TOM
  TOM <- GetTOM(seurat_obj, wgcna_name)

  # get modules, MEs:
  MEs <- GetMEs(seurat_obj, harmonized, wgcna_name)
  modules <- GetModules(seurat_obj, wgcna_name)
  mods <- levels(modules$module)

  if(exclude_grey){
    mods <- mods[mods != 'grey']
  }

  # get hub genes:
  hub_list <- lapply(mods, function(cur_mod){
    cur <- subset(modules, module == cur_mod)
    cur[,c('gene_name', paste0('kME_', cur_mod))] %>%
      top_n(n_hubs) %>% .$gene_name
  })
  names(hub_list) <- mods

  # get all genes that aren't in gray mod
  selected_genes <- modules[modules$module %in% mods,'gene_name']

  # subset the TOM for umap
  # keep all genes as rows, and keep only hubs as cols (features)
  umap_TOM <- TOM[selected_genes,unlist(hub_list)]

  # run UMAP
  hub_umap <-  uwot::umap(
    X = umap_TOM,
    min_dist = min_dist,
    n_neighbors= n_neighbors,
    metric = metric,
    spread=spread,
    ...
  )

  # set up plotting df
  plot_df <- as.data.frame(hub_umap)
  colnames(plot_df) <- c("UMAP1", "UMAP2")
  plot_df$gene <- rownames(umap_TOM)

  # add module color, and hub gene status to the plotting df:
  ix <- match(plot_df$gene, modules$gene_name)
  plot_df$module <- modules$module[ix]
  plot_df$color <- modules$color[ix]
  plot_df$hub <- ifelse(
    plot_df$gene %in% as.character(unlist(hub_list)), 'hub', 'other'
  )

  # get kME values for each gene
  kMEs <- do.call(rbind, lapply(mods, function(cur_mod){
    cur <- subset(modules, module == cur_mod)
    cur <- cur[,c('gene_name', paste0('kME_', cur_mod))]
    colnames(cur) <- c('gene_name', 'kME')

    # scale kMEs between 0 & 1:
    cur$kME <- scale01(cur$kME)
    cur
  }))
  ix <- kMEs$gene_name[match(plot_df$gene, kMEs$gene_name)]
  plot_df$kME <- kMEs[ix, 'kME']

  # add the UMAP to the Seurat object:
  seurat_obj <- SetModuleUMAP(seurat_obj, plot_df, wgcna_name)

  seurat_obj
}
+16 −0
Original line number Diff line number Diff line
@@ -634,6 +634,22 @@ GetMotifOverlap <- function(seurat_obj, wgcna_name=NULL){
  seurat_obj@misc[[wgcna_name]]$motif_module_overlaps
}

############################
# ModuleUMAP
###########################

SetModuleUMAP <- function(seurat_obj, umap_df, wgcna_name=NULL){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}
  seurat_obj@misc[[wgcna_name]]$module_umap <- umap_df
  seurat_obj
}

GetModuleUMAP <- function(seurat_obj, wgcna_name=NULL){
  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}
  seurat_obj@misc[[wgcna_name]]$module_umap
}



############################
+131 −0
Original line number Diff line number Diff line
@@ -869,6 +869,137 @@ HubGeneNetworkPlot <- function(
}


#' ModuleUMAPPlot
#'
#' Makes a igraph network plot using the module UMAP
#'
#' @param seurat_obj A Seurat object
#' @param
#' @param
#' @param
#' @param
#' @param
#' @keywords scRNA-seq
#' @export
#' @examples
#' ModuleUMAPPlot
ModuleUMAPPlot <- function(
  seurat_obj,
  edge.alpha=0.25,
  vertex.label.cex=0.5, hub.vertex.size=4,
  other.vertex.size=1, repulse.exp=3,
  harmonized=TRUE,
  wgcna_name=NULL,
  return_graph = FALSE, # this returns the igraph object instead of plotting
  ...
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # get the TOM
  TOM <- GetTOM(seurat_obj, wgcna_name)

  # get modules, MEs:
  MEs <- GetMEs(seurat_obj, harmonized, wgcna_name)
  modules <- GetModules(seurat_obj, wgcna_name)

  # get the UMAP df:
  umap_df <- GetModuleUMAP(seurat_obj, wgcna_name)
  mods <- levels(umap_df$modules)

  # subset module df by genes in the UMAP df:
  selected_modules <- modules[umap_df$gene,]
  selected_modules <- cbind(selected_modules, umap_df[,c('UMAP1', 'UMAP2', 'hub', 'kME')])

  # subset the TOM:
  subset_TOM <- TOM[umap_df$gene, umap_df$gene[umap_df$hub == 'hub']]

  # labels
  selected_modules$label <- ifelse(selected_modules$hub == 'hub', as.character(selected_modules$gene_name), '')
  selected_modules$fontcolor <- ifelse(selected_modules$color == 'black', 'gray50', 'black')


  # make sure all nodes have at least one edge!!
  edge_cutoff <- min(sapply(1:nrow(subset_TOM), function(i){max(subset_TOM[i,])}))
  edge_df <- subset_TOM %>% melt %>% subset(value >= edge_cutoff)
  print(dim(edge_df))

  # scale edge values between 0 and 1
  edge_df$value <- scale01(edge_df$value)


  # set color of each edge based on value:
  edge_df$color <- future.apply::future_sapply(1:nrow(edge_df), function(i){
    gene1 = as.character(edge_df[i,'Var1'])
    gene2 = as.character(edge_df[i,'Var2'])

    col1 <- modules %>% subset(gene_name == gene1) %>% .$color
    col2 <- modules %>% subset(gene_name == gene2) %>% .$color

    if(col1 == col2){
      col = col1
    } else{
      col = 'gray90'
    }
    col
  })


  # edges & vertices are plotted in igraph starting with the first row, so re-order s.t. strong edges are on bottom, all gray on the top of the table:
  edge_df <- edge_df %>% arrange(value)
  edge_df <- rbind(
    subset(edge_df, color == 'grey90'),
    subset(edge_df, color != 'grey90')
  )
  head(edge_df)


  # set alpha of edges
  edge_df$color <- sapply(1:nrow(edge_df), function(i){
    a = edge_df$value[i]
    #if(edge_df$value[i] < 0.5){a=0.5}
    alpha(edge_df$color[i], alpha=a)
  })

  # re-order vertices so hubs are plotted on top
  selected_modules <- rbind(
    subset(selected_modules , hub == 'other'),
    subset(selected_modules , hub != 'other')
  )

  # setup igraph:
  g <- igraph::graph_from_data_frame(
    edge_df,
    directed=FALSE,
    vertices=selected_modules
  )

  if(return_graph){return(g)}

  plot(
    g,
    layout=  as.matrix(selected_modules[,c('UMAP1', 'UMAP2')]),
    edge.color=adjustcolor(E(g)$color, alpha.f=edge.alpha),
    vertex.size=V(g)$kME * 3,
    edge.curved=0,
    edge.width=0.5,
    vertex.color=V(g)$color,
    # vertex.frame.color=V(g)$color,
  #  vertex.label=V(g)$label,
    vertex.label="",
    vertex.label.family='Helvetica', #vertex.label.font=vertex_df$font,
    vertex.label.font = 3,
    vertex.label.color = V(g)$fontcolor,
    vertex.label.cex=0,
    vertex.frame.color=ifelse(
      V(g)$geneset == 'hub',
      'black', V(g)$color
    )
  )

}


#' OverlapDotPlot
#'
#' Makes barplots from Enrichr data
+89 −142
Original line number Diff line number Diff line
@@ -26,7 +26,7 @@ enableWGCNAThreads(nThreads = 8)
set.seed(2021)
colfunc <- colorRampPalette(rev(brewer.pal(11, 'Spectral' )))
theme_set(theme_cowplot())
# scp ../../pipelines/scWGCNA/R/* hpc3:/dfs3b/swaruplab/smorabit/collab/woodlab/cocaine_mouse_2021/Nurr2c_vs_GFP/scWGCNA/bin/
# scp scp R/* hpc3:/dfs3b/swaruplab/smorabit/analysis/scWGCNA/bin/
setwd("/dfs3b/swaruplab/smorabit/collab/woodlab/cocaine_mouse_2021/Nurr2c_vs_GFP/scWGCNA")

# source all of the scWGCNA scripts:
@@ -890,6 +890,12 @@ Hub gene network plot using UMAP embedding, similar to Pando

* label the top 5 hubs of each module

* RunModuleUMAP
* ModuleUMAPPlot
  - add options to label genes
  - Change how edges are selected. Add an option for edge selection?
  - Add an option to return the igraph object instead of plotting.

```{r eval=FALSE}


@@ -899,164 +905,38 @@ availableCores()
plan(multicore, workers=8)
options(future.globals.maxSize= 16*1024^3 )




################################################################################
# Focus on the hub genes:
################################################################################

# get the TOM
TOM <- GetTOM(cur_seurat)

# get modules, MEs:
MEs <- GetMEs(cur_seurat)
modules <- GetModules(cur_seurat)
mods <- levels(modules$module)
mods <- mods[mods != 'grey']

n_hubs <- 50
n_other <- 25

# get hub genes:
hub_list <- lapply(mods, function(cur_mod){
  cur <- subset(modules, module == cur_mod)
  cur[,c('gene_name', paste0('kME_', cur_mod))] %>%
    top_n(n_hubs) %>% .$gene_name
})
names(hub_list) <- mods

# sample the same number of genes in each module
other_genes <- modules %>%
  subset(!(gene_name %in% unlist(hub_list))) %>%
  group_by(module) %>%
  sample_n(n_other, replace=TRUE) %>%
  .$gene_name %>% unique

# subset TOM by these genes:
gene_list <- c(as.character(unlist(hub_list)), other_genes)
cur_TOM <- TOM[gene_list,gene_list]

# alternatively, keep all genes as rows, and keep only hubs as cols (features)
cur_TOM <- TOM[,unlist(hub_list)]

# run UMAP
hub_umap <- uwot::umap(cur_TOM)

# set up plotting df
plot_df <- as.data.frame(hub_umap)
colnames(plot_df) <- c("UMAP1", "UMAP2")
plot_df$gene <- rownames(cur_TOM)

# add module color, and hub gene status to the plotting df:

ix <- match(plot_df$gene, modules$gene_name)
plot_df$module <- modules$module[ix]
plot_df$color <- modules$color[ix]
plot_df$hub <- ifelse(
  plot_df$gene %in% as.character(unlist(hub_list)), 'hub', 'other'
)
plot_df$size <- ifelse(
  plot_df$hub == 'hub', 2,1
cur_seurat <- RunModuleUMAP(
  cur_seurat,
  n_hubs = 25,
  exclude_grey = TRUE,
  min_dist=0.4
)

plot_df <- GetModuleUMAP(cur_seurat)

p <- ggplot(plot_df, aes(x=UMAP1, y=UMAP2)) +
     geom_point(color=plot_df$color, size=plot_df$size) +
     geom_point(color=plot_df$color, size=plot_df$kME*2) +
     umap_theme

pdf(paste0(fig_dir, 'test_hubgene_umap2.pdf'), width=6, height=6, useDingbats=FALSE)
pdf(paste0(fig_dir, 'test_hubgene_umap3.pdf'), width=6, height=6, useDingbats=FALSE)
p
dev.off()

library(reshape2)
library(igraph)

################################################################################
# all the genes (only take a few from the grey mod)
################################################################################

# get the TOM
TOM <- GetTOM(cur_seurat)

# get modules, MEs:
MEs <- GetMEs(cur_seurat)
modules <- GetModules(cur_seurat)
mods <- levels(modules$module)
mods <- mods[mods != 'grey']

n_hubs <- 25

# get hub genes:
hub_list <- lapply(mods, function(cur_mod){
  cur <- subset(modules, module == cur_mod)
  cur[,c('gene_name', paste0('kME_', cur_mod))] %>%
    top_n(n_hubs) %>% .$gene_name
})
names(hub_list) <- mods

# get all genes that aren't in gray mod
selected_genes <- modules[modules$module %in% mods,'gene_name']


# subset the TOM for umap
# keep all genes as rows, and keep only hubs as cols (features)
umap_TOM <- TOM[selected_genes,unlist(hub_list)]

# run UMAP
hub_umap <-  uwot::umap(
  X = umap_TOM,
  n_neighbors= 25,
  n_components=2,
  metric = "cosine",
  spread=1,
  min_dist=0.4
)

# set up plotting df
plot_df <- as.data.frame(hub_umap)
colnames(plot_df) <- c("UMAP1", "UMAP2")
plot_df$gene <- rownames(cur_TOM)

# add module color, and hub gene status to the plotting df:
ix <- match(plot_df$gene, modules$gene_name)
plot_df$module <- modules$module[ix]
plot_df$color <- modules$color[ix]
plot_df$hub <- ifelse(
  plot_df$gene %in% as.character(unlist(hub_list)), 'hub', 'other'
)

# size based on hub status
plot_df$size <- ifelse(
  plot_df$hub == 'hub', 2,1
pdf(paste0(fig_dir, 'test_hubgene_umap_igraph2.pdf'), width=6, height=6, useDingbats=FALSE)
ModuleUMAPPlot(
  cur_seurat
)

# get kME values for each gene
kMEs <- do.call(rbind, lapply(mods, function(cur_mod){
  cur <- subset(modules, module == cur_mod)
  cur <- cur[,c('gene_name', paste0('kME_', cur_mod))]
  colnames(cur) <- c('gene_name', 'kME')

  # scale kMEs between 0 & 1:
  cur$kME <- scale01(cur$kME)
  cur
}))

ix <- kMEs$gene_name[match(plot_df$gene, kMEs$gene_name)]
plot_df$kME <- kMEs[ix, 'kME']
dev.off()


p <- ggplot(plot_df, aes(x=UMAP1, y=UMAP2)) +
     geom_point(color=plot_df$color, size=plot_df$kME*2) +
     umap_theme

pdf(paste0(fig_dir, 'test_hubgene_umap2.pdf'), width=5, height=5, useDingbats=FALSE)
p
dev.off()
g <- ModuleUMAPPlot(cur_seurat, return_graph=TRUE)

################################################################################
# plot in igraph:
################################################################################


# function args
edge.alpha=0.15
vertex.label.cex=0.5
@@ -1066,6 +946,7 @@ repulse.exp=3

# subset modules for these genes:
selected_modules <- modules %>% subset(gene_name %in% selected_genes)

# subset_TOM <- TOM[selected_genes,selected_genes]
subset_TOM <- umap_TOM

@@ -1176,6 +1057,72 @@ pdf(paste0(fig_dir, 'test_hubgene_umap2_igraph.pdf'), width=9, height=9, useDing
  dev.off()


################################################################################
# Randomly sample 20% of edges (for making a logo)
################################################################################

edge_df <- edge_df[sample(rownames(edge_df), round(nrow(edge_df)*0.2)),]

edge_df <- edge_df %>% arrange(value)
edge_df <- rbind(
  subset(edge_df, color == 'grey90'),
  subset(edge_df, color != 'grey90')
)
head(edge_df)

# set alpha of edges
edge_df$color <- sapply(1:nrow(edge_df), function(i){
  a = edge_df$value[i]
  #if(edge_df$value[i] < 0.5){a=0.5}
  alpha(edge_df$color[i], alpha=a)
})

# re-order vertices so hubs are plotted on top
selected_modules <- rbind(
  subset(selected_modules , geneset == 'other'),
  subset(selected_modules , geneset != 'other')
)

# make black into a dark grey
selected_modules$color <- ifelse(
  selected_modules$color == 'black',
  'grey50',
  selected_modules$color
)

# setup igraph:
g <- igraph::graph_from_data_frame(
  edge_df,
  directed=FALSE,
  vertices=selected_modules
)
head(E(g)$value)
head(E(g)$color)

pdf(paste0(fig_dir, 'test_hubgene_umap2_igraph_logo.pdf'), width=9, height=9, useDingbats=FALSE)

  plot(
    g,
    layout=  as.matrix(selected_modules[,c('UMAP1', 'UMAP2')]),
    edge.color=adjustcolor(E(g)$color, alpha.f=edge.alpha),
    #edge.color=E(g)$color,
    #vertex.size=V(g)$size,
    vertex.size=V(g)$kME * 3,
    edge.curved=0,
    edge.width=0.5,
    vertex.color=V(g)$color,
    vertex.frame.color=V(g)$color,
    vertex.label="",
    vertex.label.family='Helvetica', #vertex.label.font=vertex_df$font,
    vertex.label.font = 3,
    vertex.label.color = V(g)$fontcolor,
    vertex.label.cex=0,
  )

  dev.off()



```