Commit 77dbf97c authored by smorabit's avatar smorabit
Browse files

added tutorial dataset

parent 57abad16
Loading
Loading
Loading
Loading
+153 −0
Original line number Diff line number Diff line
@@ -2014,3 +2014,156 @@ PlotModuleTraitCorrelation <- function(
  }

}



ModuleTFNetwork <- function(
  seurat_obj,
  tf_name,
  tf_gene_name,
  edge.alpha = 0.75,
  cor_thresh =  0.25,
  high_color = 'red',
  mid_color = 'grey',
  low_color = 'blue',
  slot = 'data',
  size.scale = 30,
  wgcna_name = NULL

){

  if(is.null(wgcna_name)){wgcna_name <- seurat_obj@misc$active_wgcna}

  # get modules,
  modules <- GetModules(seurat_obj, wgcna_name) %>%
    subset(module != 'grey') %>%
    mutate(module = droplevels(module))
  MEs <- GetMEs(seurat_obj, TRUE, wgcna_name) %>% as.matrix
  MEs <- MEs[,colnames(MEs) != 'grey']
  mod_sizes <- table(modules$module)

  # correlation of MEs:
  module_cor <- Hmisc::rcorr(x=MEs, type='pearson')$r
  module_cor[lower.tri(module_cor)] <- NA
  module_cor <- reshape2::melt(module_cor) %>% na.omit
  module_cor <- subset(module_cor, abs(value) >= cor_thresh & Var1 != Var2)
  module_cor

  # correlation of modules with TF expression:
  cur_exp <- GetAssayData(seurat_obj, slot=slot)[tf_gene_name,]
  exp_cor <- Hmisc::rcorr(x=MEs, y=cur_exp)$r[1:ncol(MEs),'y']
  exp_cor <- data.frame(
    mod = names(exp_cor),
    value = as.numeric(exp_cor)
  )

  plot_lim <- abs(max(c(abs(range(exp_cor$value)), abs(range(module_cor$value)))))

  # make a dummy ggplot so I can get the colors:
  p <- ggplot(module_cor, aes(x=Var1, y=Var2, color=value)) +
    geom_point() +
    scale_color_gradient2(high=high_color, mid=mid_color, low=low_color, limits=c(-1*plot_lim, plot_lim))
  ggp <- ggplot_build(p)
  module_cor$color <- ggp$data[[1]]$colour

  p <- ggplot(exp_cor, aes(x=mod, y=mod, color=value)) +
    geom_point() +
    scale_color_gradient2(high=high_color, mid=mid_color, low=low_color, limits=c(-1*plot_lim, plot_lim))
  ggp <- ggplot_build(p)
  exp_cor$color <- ggp$data[[1]]$colour

  # get the UMAP df:
  umap_df <- GetModuleUMAP(seurat_obj, wgcna_name)
  mods <- levels(umap_df$modules)

  # get tf info
  tf_match <- GetMotifMatrix(seurat_obj)
  tf_targets <- GetMotifTargets(seurat_obj)
  motif_df <- GetMotifs(seurat_obj)
  overlap_df <- GetMotifOverlap(seurat_obj)

  # compute the UMAP centroids:
  centroid_df <-
    umap_df %>% dplyr::select(c(UMAP1, UMAP2, module)) %>%
    dplyr::group_by(module) %>%
    dplyr::summarise(x = mean(UMAP1), y = mean(UMAP2))

  # subset overlap_df by current TF:
  cur_overlap <- subset(overlap_df, tf == tf_name)

  # combine the two datasets:
  node_df <- dplyr::left_join(centroid_df, cur_overlap, by='module') %>%
    dplyr::rename(c(UMAP1=x, UMAP2=y, name=module))

  node_df$size <- as.numeric(node_df$size_intersection) / as.numeric(mod_sizes)

  # add info for tf to this table:
  tf_df <- umap_df[umap_df$gene == tf_gene_name, ] %>%
    dplyr::select(-c(hub, kME, module)) %>%
    dplyr::rename(name=gene)
  tf_df$size <- 0.25

  # set up edge df
  edge_df <- data.frame(
    Var1 = tf_gene_name,
    Var2 = node_df$name,
    value = node_df$odds_ratio,
    color = exp_cor$color
  )

  # set the edge color to grey if the overlap isn't significant:
  edge_df$color <- ifelse(cur_overlap$fdr <= 0.05, edge_df$color, 'grey')

  # set up node df
  node_df <- dplyr::bind_rows(node_df, tf_df) %>% as.data.frame()
  rownames(node_df) <- as.character(node_df$name)

  # set up directed edge df:
  g1 <- igraph::graph_from_data_frame(
    edge_df,
    directed=TRUE,
    vertices=node_df
  )

  # set up undirected net
  g2 <- igraph::graph_from_data_frame(
    module_cor,
    directed=FALSE,
    vertices=node_df
  )


  plot(
    g2,
    layout = as.matrix(node_df[,c('UMAP1', 'UMAP2')]),
    vertex.size=1,
    edge.curved=0,
    edge.width=1,
    vertex.color='grey',
    vertex.label='',
    edge.color=adjustcolor(E(g2)$color, alpha.f=edge.alpha),
  )

  plot(
    g1,
    layout = as.matrix(node_df[,c('UMAP1', 'UMAP2')]),
    edge.color=adjustcolor(E(g1)$color, alpha.f=edge.alpha),
    vertex.size=V(g1)$size * size.scale,
    edge.curved=0,
    edge.width=edge_df$value*2,
    vertex.color=V(g1)$color,
    vertex.label=V(g1)$name,
    vertex.label.dist=1.1,
    vertex.label.degree=-pi/4,
    vertex.label.family='Helvetica', #vertex.label.font=vertex_df$font,
    vertex.label.font = 3,
    vertex.label.color = 'black',
    vertex.label.cex=0,
    vertex.frame.color='black',
    margin=0,
    edge.arrow.size=edge_df$value/2,
    add=TRUE
  )


}
+33 −27

File changed.

Preview size limit exceeded, changes collapsed.

+1 −0
Original line number Diff line number Diff line
@@ -165,6 +165,7 @@
<a href="module_trait_correlation.html">Module trait correlation</a><a class="anchor" aria-label="anchor" href="#module-trait-correlation"></a>
</h3>
<p>This tutorial covers how to correlate continuous and categorical variables with module eigengenes or module expression scores, revealing which modules are related to different experimental conditions or covariates.</p>
<p><img src="figures/mt_correlation/ME_Trait_correlation_fdr.png" width="400" height="400" href="module_trait_correlation.html"></p>
</div>
<div class="section level3">
<h3 id="enrichment-analysis">
+11 −2
Original line number Diff line number Diff line
@@ -36,6 +36,15 @@ the [Seurat Guided Clustering Tutorial](https://satijalab.org/seurat/articles/pb
Additionally, there are a lot of WGCNA-specific terminology and acronyms, which
are all clarified in [this table](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-559/tables/1).

## Download the tutorial data

For the purpose of this tutorial, we provide a processed Seurat object of the
control human brains from the [Zhou et al 2020 study](https://www.nature.com/articles/s41591-019-0695-9).

```{bash eval=FALSE}
wget https://swaruplab.bio.uci.edu/public_data/Zhou_2020.rds
```

## Load the dataset and required libraries

First we will load the single-cell dataset and the required R libraries for this
@@ -62,11 +71,11 @@ theme_set(theme_cowplot())
set.seed(12345)

# load the Zhou et al snRNA-seq dataset
seurat_obj <- readRDS('data/Zhou_control.rds')
seurat_obj <- readRDS('Zhou_2020.rds')

```

Here we will plot the non-linear dimensionality reduction (UMAP) colored by cell
Here we will plot the UMAP colored by cell
type just to check that we have loaded the data correctly, and to make sure that
we have grouped cells into clusters and cell types.

+2 −0
Original line number Diff line number Diff line
@@ -41,6 +41,8 @@ This tutorial covers how to correlate continuous and categorical variables with
module eigengenes or module expression scores, revealing which modules are related
to different experimental conditions or covariates.

<img src="figures/mt_correlation/ME_Trait_correlation_fdr.png" width="400" height="400" href="module_trait_correlation.html">

### [Enrichment analysis](enrichment_analysis.html)

This tutorial shows how to use Enrichr to compare the gene members of each co-expression