Commit 23890a5c authored by smorabit's avatar smorabit
Browse files

MotifScan, Overlap Motifs

parent 93fd73d5
Loading
Loading
Loading
Loading
+12 −9
Original line number Diff line number Diff line
@@ -8,9 +8,10 @@
#' @export
#' @examples
#' NormalizeMetadata
NormalizeMetacells <- function(seurat_obj, ...){
  seurat_obj@misc[[seurat_obj@misc$active_wgcna]]$wgcna_metacell_obj <- Seurat::NormalizeData(seurat_obj@misc[[seurat_obj@misc$active_wgcna]]$wgcna_metacell_obj, ...)
  seurat_obj
NormalizeMetacells <- function(seurat_obj, wgcna_name=NULL, ...){
  metacell_obj <- GetMetacellObject(seurat_obj, wgcna_name)
  metacell_obj <- Seurat::NormalizeData(metacell_obj, ...)
  SetMetacellObject(seurat_obj, metacell_obj, wgcna_name)
}

#' ScaleMetacells
@@ -22,12 +23,13 @@ NormalizeMetacells <- function(seurat_obj, ...){
#' @export
#' @examples
#' ScaleMetadata
ScaleMetacells <- function(seurat_obj, ...){
ScaleMetacells <- function(seurat_obj, wgcna_name=NULL, ...){
  if(!exists("features")){
    features = VariableFeatures(seurat_obj)
  }
  seurat_obj@misc[[seurat_obj@misc$active_wgcna]]$wgcna_metacell_obj <- Seurat::ScaleData(seurat_obj@misc[[seurat_obj@misc$active_wgcna]]$wgcna_metacell_obj, ...)
  seurat_obj
  metacell_obj <- GetMetacellObject(seurat_obj, wgcna_name)
  metacell_obj <- Seurat::ScaleData(metacell_obj, ...)
  SetMetacellObject(seurat_obj, metacell_obj, wgcna_name)
}

#' RunPCAMetacells
@@ -39,9 +41,10 @@ ScaleMetacells <- function(seurat_obj, ...){
#' @export
#' @examples
#' NormalizeMetadata
RunPCAMetacells <- function(seurat_obj, ...){
  seurat_obj@misc[[seurat_obj@misc$active_wgcna]]$wgcna_metacell_obj <- Seurat::RunPCA(seurat_obj@misc[[seurat_obj@misc$active_wgcna]]$wgcna_metacell_obj, ...)
  seurat_obj
RunPCAMetacells <- function(seurat_obj, wgcna_name=NULL, ...){
  metacell_obj <- GetMetacellObject(seurat_obj, wgcna_name)
  metacell_obj <- Seurat::RunPCA(metacell_obj, ...)
  SetMetacellObject(seurat_obj, metacell_obj, wgcna_name)
}

#' RunHarmonyMetacells
+205 −23
Original line number Diff line number Diff line
@@ -11,7 +11,12 @@
#' @export
#' @examples
#' SelectNetworkGenes(pbmc)
SelectNetworkGenes <- function(seurat_obj, gene_select="variable", fraction=0.05, gene_list=NULL, wgcna_name=NULL){
SelectNetworkGenes <- function(
  seurat_obj,
  gene_select="variable", fraction=0.05,
  group.by=NULL, # should be a column in the Seurat object, eg clusters
  gene_list=NULL, wgcna_name=NULL
 ){

  # validate inputs:
  if(!(gene_select %in% c("variable", "fraction", "all", "custom"))){
@@ -30,9 +35,27 @@ SelectNetworkGenes <- function(seurat_obj, gene_select="variable", fraction=0.05
    expr_mat <- GetAssayData(seurat_obj, slot='counts')
    expr_mat[expr_mat > 0] <- 1

    group_gene_list <- list()
    if(!is.null(group.by)){
      # identify genes that are expressed
      print(group.by)
      groups <- unique(seurat_obj@meta.data[[group.by]])
      for(cur_group in groups){

        # subset expression matrix by this group
        cur_expr <- expr_mat[,seurat_obj@meta.data[[group.by]] == cur_group]
        print(dim(cur_expr))

        gene_filter <- rowSums(cur_expr) >= round(fraction*ncol(cur_expr));
        group_gene_list[[cur_group]] <- rownames(seurat_obj)[gene_filter]
      }
      gene_list <- unique(unlist(group_gene_list))

    } else{
      # identify genes that are expressed in at least some fraction of cells
      gene_filter <- rowSums(expr_mat) >= round(fraction*ncol(seurat_obj));
      gene_list <- rownames(seurat_obj)[gene_filter]
    }

  } else if(gene_select == "variable"){
    gene_list <- VariableFeatures(seurat_obj)
@@ -123,12 +146,13 @@ TestSoftPowers <- function(
  seurat_obj,
  use_metacells = TRUE,
  group.by=NULL, group_name=NULL,
  setDatExpr = TRUE,
  powers=c(seq(1,10,by=1), seq(12,30, by=2)),
  make_plot=TRUE, outfile="softpower.pdf", figsize=c(7,7)
){

  # add datExpr if not already added:
  if(!("datExpr" %in% names(GetActiveWGCNA(seurat_obj)))){
  if(!("datExpr" %in% names(GetActiveWGCNA(seurat_obj))) | setDatExpr == TRUE){
    seurat_obj <- SetDatExpr(seurat_obj, use_metacells, group.by=group.by, group_name=group_name)
  }
  # get datExpr
@@ -200,7 +224,8 @@ TestSoftPowers <- function(
#' @examples
#' ConstructNetwork(pbmc)
ConstructNetwork <- function(
  seurat_obj, soft_power=NULL, use_metacells=TRUE, group.by=NULL, group_name=NULL,
  seurat_obj, soft_power=NULL, use_metacells=TRUE,
  setDatExpr=TRUE, group.by=NULL, group_name=NULL,
  tom_outdir="TOM",
  blocks=NULL, maxBlockSize=30000, randomSeed=12345, corType="pearson",
  consensusQuantile=0.3, networkType = "signed", TOMType = "unsigned",
@@ -212,7 +237,7 @@ ConstructNetwork <- function(
){

  # add datExpr if not already added:
  if(!("datExpr" %in% names(GetActiveWGCNA(seurat_obj)))){
  if(!("datExpr" %in% names(GetActiveWGCNA(seurat_obj))) | setDatExpr == TRUE){
    seurat_obj <- SetDatExpr(seurat_obj, use_metacells, group.by=group.by, group_name=group_name)
  }

@@ -702,7 +727,7 @@ OverlapModulesDEGs <- function(
  cell_groups <- deg_df$group %>% unique

  # get modules,
  modules <- GetModules(cur_seurat)
  modules <- GetModules(seurat_obj)
  mods <- levels(modules$module)
  mods <- mods[mods != 'grey']

@@ -720,17 +745,6 @@ OverlapModulesDEGs <- function(
  # size of genome based on # genes in Seurat object:
  genome.size <- nrow(seurat_obj)

  # compute overlap:
  cur_overlap <- GeneOverlap::testGeneOverlap(
    GeneOverlap::newGeneOverlap(
      cur_module_genes,
      cur_DEGs,
      genome.size=genome.size
  ))
  or <- cur_overlap@odds.ratio
  pval <- cur_overlap@pval
  jaccard <- cur_overlap@Jaccard

  # run overlaps between module gene lists and DEG lists:
  overlap_df <- do.call(rbind, lapply(mods, function(cur_mod){
    cur_module_genes <- modules %>% subset(module == cur_mod) %>% .$gene_name
@@ -741,9 +755,9 @@ OverlapModulesDEGs <- function(
          cur_DEGs,
          genome.size=genome.size
      ))
      c(cur_overlap@odds.ratio, cur_overlap@pval, cur_overlap@Jaccard)
      c(cur_overlap@odds.ratio, cur_overlap@pval, cur_overlap@Jaccard, length(cur_overlap@intersection))
    })) %>% as.data.frame
    colnames(cur_overlap_df) <- c('odds_ratio', 'pval', 'Jaccard')
    colnames(cur_overlap_df) <- c('odds_ratio', 'pval', 'Jaccard', 'size_intersection')
    cur_overlap_df$module <- cur_mod
    cur_overlap_df$group <- cell_groups

@@ -766,7 +780,7 @@ OverlapModulesDEGs <- function(
  overlap_df$module <- factor(overlap_df$module, levels=mods)

  # re-arrange columns:
  overlap_df <- overlap_df %>% select(c(module, group, color, odds_ratio, pval, fdr, Significance, Jaccard))
  overlap_df <- overlap_df %>% dplyr::select(c(module, group, color, odds_ratio, pval, fdr, Significance, Jaccard, size_intersection))

  overlap_df
}
@@ -1081,3 +1095,171 @@ ComputeROC <- function(

  out
}


#' Scan gene promoters for a set of TF PWMs
#'
#'
#' @keywords scRNA-seq
#' @export
#' @examples
#' MotifScan
MotifScan <- function(
  seurat_obj,
  species_genome, # hg38, mm10, etc...
  pfm, # matrix set from JASPAR2020 for example
  EnsDb, # Ensembl database such as EnsDb.Mmusculus.v79
  wgcna_name=NULL
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # get a dataframe of just the motif name and the motif ID:
  motif_df <- data.frame(
    motif_name = purrr::map(1:length(pfm), function(i){pfm[[i]]@name}) %>% unlist,
    motif_ID = purrr::map(1:length(pfm), function(i){pfm[[i]]@ID}) %>% unlist
  )

  # get promoters of protein coding genes from the given Ensdb
  # note: everything breaks if I try to use X & Y chromosomes.
  gene.promoters <- ensembldb::promoters(EnsDb, filter = ~ gene_biotype == "protein_coding") %>%
    subset(seqnames %in% c(1:100))
  gene.coords <- ensembldb::genes(EnsDb, filter = ~ gene_biotype == "protein_coding") %>%
    subset(seqnames %in% c(1:100))

  # add the gene name to the promoter object
  gene.promoters$symbol <- gene.coords$symbol[match(gene.promoters$gene_id, names(gene.coords))]

  # drop unnecessary chromosomes
  gene.promoters <- keepSeqlevels(gene.promoters, value= levels(droplevels(seqnames(gene.promoters))))

  # rename seqlevels to add 'chr', remove X&Y chromosomes because they break everything
  old_levels <- levels(seqnames(gene.promoters))
  new_levels <- ifelse(old_levels %in% c('X', 'Y'), old_levels, paste0('chr', old_levels))
  gene.promoters <- renameSeqlevels(gene.promoters, new_levels)

  # set the genome (not sure if we NEED to do this...)
  genome(seqinfo(gene.promoters)) <- species_genome

  # set up promoters object that only has the necessary info for motifmatchr
  my_promoters <- GRanges(
    seqnames =  droplevels(seqnames(gene.promoters)),
    IRanges(
      start = start(gene.promoters),
      end = end(gene.promoters)
    ),
    symbol = gene.promoters$symbol,
    genome=species_genome
  )

  # scan these promoters for motifs:
  print('Matching motifs...')
  motif_ix <- motifmatchr::matchMotifs(pfm, my_promoters, genome=species_genome)

  # get the matches
  tf_match <- motifmatchr::motifMatches(motif_ix)
  rownames(tf_match) <- my_promoters$symbol

  # use motif names as the column names:
  colnames(tf_match) <- motif_df$motif_name

  # only keep genes that are in the Seurat object and in the given EnsDb:
  gene_list <- rownames(seurat_obj)
  gene_list <- gene_list[gene_list %in% rownames(tf_match)]
  tf_match <- tf_match[gene_list,]

  # get list of target genes for each TF:
  print('Getting TF target genes...')
  tfs <- motif_df$motif_name
  tf_targets <- list()
  n_targets <- list()
  for(cur_tf in tfs){
    tf_targets[[cur_tf]] <- names(tf_match[,cur_tf][tf_match[,cur_tf]])
    n_targets[[cur_tf]] <- length(tf_targets[[cur_tf]] )
  }
  n_targets <- unlist(n_targets)

  # add number of target genes to motif df
  motif_df$n_targets <- n_targets

  # add info to seurat object
  seurat_obj <- SetMotifMatrix(seurat_obj, tf_match)
  seurat_obj <- SetMotifs(seurat_obj, motif_df)
  seurat_obj <- SetMotifTargets(seurat_obj, tf_targets)
  seurat_obj <- SetPFMList(seurat_obj, pfm)

  seurat_obj
}


#' Overlap modules with TF target genes
#'
#' @param seurat_obj A Seurat object
#' @param wgcna_name
#' @keywords scRNA-seq
#' @export
#' @examples
#' OverlapModulesDEGs
OverlapModulesMotifs <- function(
  seurat_obj, wgcna_name = NULL
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # get modules
  modules <- GetModules(seurat_obj, wgcna_name)
  mods <- levels(modules$module)
  mods <- mods[mods != 'grey']

  # get tf info
  tf_match <- GetMotifMatrix(seurat_obj)
  tf_targets <- GetMotifTargets(seurat_obj)
  motif_df <- GetMotifs(seurat_obj)

  # size of genome based on # genes in Seurat object:
  genome.size <- nrow(seurat_obj)

  cur_mod <- mods[1]
  cur_module_genes <- modules %>% subset(module == cur_mod) %>% .$gene_name
  cur_targets <- as.character(unlist(tf_targets[1]))

  overlap_df <- do.call(rbind, lapply(mods, function(cur_mod){
    cur_module_genes <- modules %>% subset(module == cur_mod) %>% .$gene_name
    cur_overlap_df <- do.call(rbind, lapply(tf_targets, function(cur_targets){
      cur_overlap <- testGeneOverlap(newGeneOverlap(
          cur_module_genes,
          as.character(unlist(cur_targets)),
          genome.size=genome.size
      ))
      c(cur_overlap@odds.ratio, cur_overlap@pval, cur_overlap@Jaccard, length(cur_overlap@intersection))
    })) %>% as.data.frame

    colnames(cur_overlap_df) <- c('odds_ratio', 'pval', 'Jaccard', 'size_intersection')
    cur_overlap_df$module <- cur_mod
    cur_overlap_df$tf <- names(tf_targets)

    # module color:
    cur_overlap_df$color <- modules %>% subset(module == cur_mod) %>% .$color %>% unique
    cur_overlap_df
  }))

  # adjust for multiple comparisons:
  overlap_df$fdr <- p.adjust(overlap_df$pval, method='fdr')

  # significance level:
  overlap_df$Significance <- gtools::stars.pval(overlap_df$fdr)
  overlap_df$Significance <- ifelse(
    overlap_df$Significance == '.', '',
    overlap_df$Significance
  )

  # set factor levels for modules:
  overlap_df$module <- factor(overlap_df$module, levels=mods)

  # re-arrange columns:
  overlap_df <- overlap_df %>% dplyr::select(c(module, tf, color, odds_ratio, pval, fdr, Significance, Jaccard, size_intersection))

  # add overlap df to Seurat obj
  seruat_obj <- SetMotifOverlap(seurat_obj, overlap_df, wgcna_name)

}
+99 −12
Original line number Diff line number Diff line
@@ -402,6 +402,99 @@ GetROCData <- function(seurat_obj, wgcna_name=NULL){
}


############################
# TF Match Matrix (not stored within the WGCNA slot)
###########################

SetMotifMatrix <- function(seurat_obj, tf_match){

  # make a spot for the motif info if it's not already there:
  if(is.null(seurat_obj@misc$motifs)){
    seurat_obj@misc$motifs <- list()
  }
  seurat_obj@misc$motifs$tf_match_matrix <- tf_match
  seurat_obj
}

GetMotifMatrix <- function(seurat_obj){
  seurat_obj@misc$motifs$tf_match_matrix
}


############################
# Motif table
###########################

SetMotifs <- function(seurat_obj, motif_df){

  # make a spot for the motif info if it's not already there:
  if(is.null(seurat_obj@misc$motifs)){
    seurat_obj@misc$motifs <- list()
  }
  seurat_obj@misc$motifs$motif_df <- motif_df
  seurat_obj
}

GetMotifs <- function(seurat_obj){
  seurat_obj@misc$motifs$motif_df
}

############################
# PFM List
###########################

SetPFMList <- function(seurat_obj, pfm_list){

  # make a spot for the motif info if it's not already there:
  if(is.null(seurat_obj@misc$motifs)){
    seurat_obj@misc$motifs <- list()
  }
  seurat_obj@misc$motifs$pfm_list <- pfm_list
  seurat_obj
}

GetPFMList <- function(seurat_obj){
  seurat_obj@misc$motifs$pfm_list
}


############################
# TF Target Genes:
###########################

SetMotifTargets <- function(seurat_obj, motif_targets){

  # make a spot for the motif info if it's not already there:
  if(is.null(seurat_obj@misc$motifs)){
    seurat_obj@misc$motifs <- list()
  }
  seurat_obj@misc$motifs$motif_targets <- motif_targets
  seurat_obj
}

GetMotifTargets <- function(seurat_obj){
  seurat_obj@misc$motifs$motif_targets
}


############################
# motif overlap
###########################

SetMotifOverlap <- function(seurat_obj, overlap_df, wgcna_name=NULL){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}
  seurat_obj@misc[[wgcna_name]]$motif_module_overlaps <- overlap_df
  seurat_obj
}

GetMotifOverlap <- function(seurat_obj, wgcna_name=NULL){
  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}
  seurat_obj@misc[[wgcna_name]]$motif_module_overlaps
}



############################
# Reset module names:
###########################
@@ -412,10 +505,6 @@ ResetModuleNames <- function(
  wgcna_name=NULL
){

  #TODO:
  # only re-name things if they exist. For example, skip the avg mod exp step
  # if we never ran it!

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # get modules
@@ -434,8 +523,6 @@ ResetModuleNames <- function(
    new_names <- c(new_names[1:(grey_ind-1)], 'grey', new_names[grey_ind:length(new_names)])
  }

  print('here')

  # update kMEs
  new_kMEs <- paste0('kME_', new_names)
  colnames(modules) <- c(colnames(modules)[1:3], new_kMEs)
@@ -456,21 +543,21 @@ ResetModuleNames <- function(

  # update hME table:
  hMEs <- GetMEs(seurat_obj, harmonized=TRUE, wgcna_name)
  if(!is.na(hMEs)){
  if(!is.null(hMEs)){
    colnames(hMEs) <- new_mod_df$new
    seurat_obj <- SetMEs(seurat_obj, hMEs, harmonized=TRUE, wgcna_name)
  }

  # update ME table
  MEs <- GetMEs(seurat_obj, harmonized=FALSE, wgcna_name)
  if(!is.na(MEs)){
  if(!is.null(MEs)){
    colnames(MEs) <- new_mod_df$new
    seurat_obj <- SetMEs(seurat_obj, MEs, harmonized=FALSE, wgcna_name)
  }

  # update module scores:
  module_scores <- GetModuleScores(seurat_obj, wgcna_name)
  if(!is.na(module_scores)){
  if(!is.null(module_scores)){
    if(!("grey" %in% colnames(module_scores))){
      colnames(module_scores) <- new_mod_df$new[new_mod_df$new != 'grey']
    } else {
@@ -481,7 +568,7 @@ ResetModuleNames <- function(

  # update average module expression:
  avg_exp <- GetAvgModuleExpr(seurat_obj, wgcna_name)
  if(!is.na(avg_exp)){
  if(!is.null(avg_exp)){
    if(!("grey" %in% colnames(avg_exp))){
      colnames(avg_exp) <- new_mod_df$new[new_mod_df$new != 'grey']
    } else {
@@ -492,7 +579,7 @@ ResetModuleNames <- function(

  # update enrichr table:
  enrich_table <- GetEnrichrTable(seurat_obj, wgcna_name)
  if(!is.na(enrich_table)){
  if(!is.null(enrich_table)){
    enrich_table$module <- factor(
      new_mod_df[match(enrich_table$module, new_mod_df$old),'new'],
      levels = as.character(new_mod_df$new)
@@ -503,7 +590,7 @@ ResetModuleNames <- function(
  # update ROC info:
  # THIS DOES NOT UPDATE THE ROC OBJECTS THEMSELVES!!!
  roc_data <- GetROCData(seurat_obj, wgcna_name)
  if(!is.na(roc_data)){
  if(!is.null(roc_data)){
    roc_data$roc$module <- factor(
      new_mod_df[match(roc_data$roc$module, new_mod_df$old),'new'],
      levels = as.character(new_mod_df$new)
+100 −0
Original line number Diff line number Diff line
@@ -1045,3 +1045,103 @@ ROCCurves <- function(
  p

}



#' Displays the top n TFs in a set of modules as a bar plot
#'
#' @param seurat_obj A Seurat object
#' @param wgcna_name
#' @keywords scRNA-seq
#' @export
#' @examples
#' MotifOverlapBarPlot
MotifOverlapBarPlot <- function(
  seurat_obj,
  n_tfs = 10,
  plot_size = c(5,6),
  outdir = "MotifOverlaps/",
  motif_font = 'helvetica_regular',
  module_names = NULL,
  wgcna_name=NULL
){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # make output dir if it doesn't exist:
  if(!dir.exists(outdir)){dir.create(outdir)}

  # get Modules
  modules <- GetModules(seurat_obj)
  mods <- levels(modules$module)
  mods <- mods[mods != 'grey']

  if(is.null(module_names)){module_names <- mods}

  # get overlap info from Seurat obj
  overlap_df <- GetMotifOverlap(seurat_obj, wgcna_name)
  motif_df <- GetMotifs(seurat_obj)

  # get pfm list from Seurat obj
  pfm <- GetPFMList(seurat_obj)

  # add motif ID to the overlap_df
  overlap_df$motif_ID <- motif_df$motif_ID[match(overlap_df$tf, motif_df$motif_name)]

  # subset by the modules that we are using:
  overlap_df <- overlap_df %>% subset(module %in% module_names)

  for(cur_mod in module_names){
    print(cur_mod)
    # get df for cur mod
    plot_df <- overlap_df %>% subset(module == cur_mod) %>% top_n(n_tfs, wt=odds_ratio) %>%
      arrange(desc(odds_ratio))

    # make the barplot
    p1 <- plot_df %>% ggplot(aes(y=reorder(tf, odds_ratio), fill=odds_ratio, x=odds_ratio)) +
      geom_bar(stat='identity', width=0.7) + NoLegend() +
      scale_fill_gradient(high=unique(plot_df$color), low='grey90') +
      ylab('') + theme(
        axis.line.y = element_blank(),
        axis.text.y = element_blank(),
        plot.margin = margin(
          t = 0, r = 0, b = 0, l = 0
        )
      )

    # make the motif logo plots:
    plot_list <- list()
    for(i in 1:nrow(plot_df)){
      cur_id <- plot_df[i,'motif_ID']
      cur_name <- plot_df[i,'tf']
      plot_list[[cur_id]] <- ggplot() +
        ggseqlogo::geom_logo( as.matrix(pfm[[cur_id]]), font=motif_font) +
        ggseqlogo::theme_logo() +
        xlab('') + ylab(cur_name) + theme(
          axis.text.x=element_blank(),
          axis.text.y=element_blank(),
          axis.title.y = element_text(angle=0),
          plot.margin = margin(t = 0,  # Top margin
                                 r = 0,  # Right margin
                                 b = 0,  # Bottom margin
                                 l = 0) # Left margin
        )
    }

    # wrap the motif logos
    patch1 <- wrap_plots(plot_list, ncol=1)

    # assemble the plot with patchwork
    outplot <- (patch1 | p1) +
          plot_layout(ncol=2, widths=c(1,2)) +
          plot_annotation(title=paste0('Motif overlaps with ', cur_mod),
          theme = theme(plot.title=element_text(hjust=0.5))
        )

    pdf(paste0(outdir, '/', cur_mod, '_motif_overlaps.pdf'), width=plot_size[1], height=plot_size[2], useDingbats=FALSE)
    print(outplot)
    dev.off()

  }

}
+3 −592

File changed.

Preview size limit exceeded, changes collapsed.

Loading