Commit 51fcfaac authored by smorabit's avatar smorabit
Browse files

finished module umap functions

parent 0cad04fd
Loading
Loading
Loading
Loading
+5 −7
Original line number Diff line number Diff line
Package: scWGCNA
Title: What the Package Does (One Line, Title Case)
Title: WGCNA
Version: 0.0.0.9000
Authors@R: 
    person(given = "First",
           family = "Last",
           role = c("aut", "cre"),
           email = "first.last@example.com",
           comment = c(ORCID = "YOUR-ORCID-ID"))
Authors@R: c(
    person("Samuel", "Morabito", , "smorabit@uci.edu", role = c("aut", "cre"),
           comment = c(ORCID = "0000-0002-7768-4856"))
  )
Description: What the package does (one paragraph).
License: `use_mit_license()`, `use_gpl3_license()` or friends to
    pick a license
+38 −1
Original line number Diff line number Diff line
@@ -759,6 +759,27 @@ ResetModuleNames <- function(
    seurat_obj <- SetROCData(seurat_obj, roc_data, wgcna_name)
  }

  # update motif overlap
  overlap_df <- GetMotifOverlap(seurat_obj, wgcna_name)
  if(!is.null(overlap_df)){
    overlap_df$module <- factor(
      new_mod_df[match(overlap_df$module, new_mod_df$old),'new'],
      levels = as.character(new_mod_df$new)
    )
    seurat_obj <- SetMotifOverlap(seurat_obj, overlap_df, wgcna_name)
  }

  # update module umap:
  umap_df <- GetModuleUMAP(seurat_obj, wgcna_name)
  if(!is.null(umap_df)){
    umap_df$module <- factor(
      new_mod_df[match(umap_df$module, new_mod_df$old),'new'],
      levels = as.character(new_mod_df$new)
    )
    seurat_obj <- SetModuleUMAP(seurat_obj, umap_df, wgcna_name)
  }


  seurat_obj

}
@@ -778,7 +799,7 @@ ResetModuleColors <- function(

  # get modules
  modules <- GetModules(seurat_obj, wgcna_name)
  mod_colors <- select(modules, c(module, color)) %>%
  mod_colors <- dplyr::select(modules, c(module, color)) %>%
    distinct %>% arrange(module) %>% .$color
  grey_ind <- which(mod_colors == 'grey')

@@ -801,6 +822,22 @@ ResetModuleColors <- function(
  # set module table
  seurat_obj <- SetModules(seurat_obj, modules, wgcna_name)


  # update motif overlap
  overlap_df <- GetMotifOverlap(seurat_obj, wgcna_name)
  if(!is.null(overlap_df)){
    overlap_df$color <- new_color_df[match(overlap_df$color, new_color_df$old),'new']
    seurat_obj <- SetMotifOverlap(seurat_obj, overlap_df, wgcna_name)
  }

  # update module umap:
  umap_df <- GetModuleUMAP(seurat_obj, wgcna_name)
  if(!is.null(umap_df)){
    umap_df$module <- new_color_df[match(umap_df$color, new_color_df$old),'new']
    seurat_obj <- SetModuleUMAP(seurat_obj, umap_df, wgcna_name)
  }


  seurat_obj

}
+63 −31
Original line number Diff line number Diff line
@@ -813,7 +813,7 @@ HubGeneNetworkPlot <- function(
    if(col1 == col2){
      col = col1
    } else{
      col = 'gray90'
      col = 'grey90'
    }
    col
  })
@@ -885,12 +885,16 @@ HubGeneNetworkPlot <- function(
#' ModuleUMAPPlot
ModuleUMAPPlot <- function(
  seurat_obj,
  sample_edges = TRUE, # TRUE if we sample edges randomly, FALSE if we take the top edges
  edge_prop = 0.2,
  label_hubs = 5, # how many hub genes to label?
  edge.alpha=0.25,
  vertex.label.cex=0.5, hub.vertex.size=4,
  other.vertex.size=1, repulse.exp=3,
  harmonized=TRUE,
  wgcna_name=NULL,
  vertex.label.cex=0.5,
  hub.vertex.size=4,
  other.vertex.size=1,
  repulse.exp=3,
  return_graph = FALSE, # this returns the igraph object instead of plotting
  wgcna_name=NULL,
  ...
){

@@ -899,8 +903,7 @@ ModuleUMAPPlot <- function(
  # get the TOM
  TOM <- GetTOM(seurat_obj, wgcna_name)

  # get modules, MEs:
  MEs <- GetMEs(seurat_obj, harmonized, wgcna_name)
  # get modules,
  modules <- GetModules(seurat_obj, wgcna_name)

  # get the UMAP df:
@@ -915,19 +918,18 @@ ModuleUMAPPlot <- function(
  subset_TOM <- TOM[umap_df$gene, umap_df$gene[umap_df$hub == 'hub']]

  # labels
  selected_modules$label <- ifelse(selected_modules$hub == 'hub', as.character(selected_modules$gene_name), '')
  label_genes <- selected_modules %>% group_by(module) %>% top_n(label_hubs, wt=kME) %>% .$gene_name
  selected_modules$label <- ifelse(selected_modules$gene_name %in% label_genes, selected_modules$gene_name, '')
  selected_modules$fontcolor <- ifelse(selected_modules$color == 'black', 'gray50', 'black')

  # set frome color
  # same color as module for all genes, black outline for the selected hub genes
  selected_modules$framecolor <- ifelse(selected_modules$gene_name %in% label_genes, 'black', selected_modules$color)

  # make sure all nodes have at least one edge!!
  edge_cutoff <- min(sapply(1:nrow(subset_TOM), function(i){max(subset_TOM[i,])}))
  edge_df <- subset_TOM %>% melt %>% subset(value >= edge_cutoff)
  # melt TOM into long format
  edge_df <- subset_TOM %>% melt
  print(dim(edge_df))

  # scale edge values between 0 and 1
  edge_df$value <- scale01(edge_df$value)


  # set color of each edge based on value:
  edge_df$color <- future.apply::future_sapply(1:nrow(edge_df), function(i){
    gene1 = as.character(edge_df[i,'Var1'])
@@ -939,11 +941,38 @@ ModuleUMAPPlot <- function(
    if(col1 == col2){
      col = col1
    } else{
      col = 'gray90'
      col = 'grey90'
    }
    col
  })

  # subset edges:
  groups <- unique(edge_df$color)
  print(groups)
  if(sample_edges){
    print('here')
    # randomly sample
    temp <- do.call(rbind, lapply(groups, function(cur_group){
      cur_df <- edge_df %>% subset(color == cur_group)
      n_edges <- nrow(cur_df)
      cur_sample <- sample(1:n_edges, round(n_edges * edge_prop))
      cur_df[cur_sample,]
    }))
  } else{

    # get top strongest edges
    temp <- do.call(rbind, lapply(groups, function(cur_group){
      cur_df <- edge_df %>% subset(color == cur_group)
      n_edges <- nrow(cur_df)
      cur_df %>% dplyr::top_n(round(n_edges * edge_prop), wt=value)
    }))
  }

  edge_df <- temp
  print(dim(edge_df))

  # scale edge values between 0 and 1 for each module
  edge_df <- edge_df %>% group_by(color) %>% mutate(value=scale01(value))

  # edges & vertices are plotted in igraph starting with the first row, so re-order s.t. strong edges are on bottom, all gray on the top of the table:
  edge_df <- edge_df %>% arrange(value)
@@ -953,13 +982,12 @@ ModuleUMAPPlot <- function(
  )
  head(edge_df)


  # set alpha of edges
  edge_df$color <- sapply(1:nrow(edge_df), function(i){
    a = edge_df$value[i]
    #if(edge_df$value[i] < 0.5){a=0.5}
    alpha(edge_df$color[i], alpha=a)
  })
  # set alpha of edges based on kME
  edge_df$color_alpha <- ifelse(
    edge_df$color == 'grey90',
    alpha(edge_df$color, alpha=edge_df$value/2),
    alpha(edge_df$color, alpha=edge_df$value)
  )

  # re-order vertices so hubs are plotted on top
  selected_modules <- rbind(
@@ -967,6 +995,12 @@ ModuleUMAPPlot <- function(
    subset(selected_modules , hub != 'other')
  )

  # re-order vertices so labeled genes are on top
  selected_modules <- rbind(
    subset(selected_modules , label == ''),
    subset(selected_modules , label != '')
  )

  # setup igraph:
  g <- igraph::graph_from_data_frame(
    edge_df,
@@ -979,22 +1013,20 @@ ModuleUMAPPlot <- function(
  plot(
    g,
    layout=  as.matrix(selected_modules[,c('UMAP1', 'UMAP2')]),
    edge.color=adjustcolor(E(g)$color, alpha.f=edge.alpha),
    # edge.color=adjustcolor(E(g)$color, alpha.f=edge.alpha),
    edge.color=adjustcolor(E(g)$color_alpha, alpha.f=edge.alpha),
    vertex.size=V(g)$kME * 3,
    edge.curved=0,
    edge.width=0.5,
    vertex.color=V(g)$color,
    # vertex.frame.color=V(g)$color,
  #  vertex.label=V(g)$label,
    vertex.label="",
    vertex.label=V(g)$label,
    vertex.label.dist=1.1,
    vertex.label.degree=-pi/4,
    vertex.label.family='Helvetica', #vertex.label.font=vertex_df$font,
    vertex.label.font = 3,
    vertex.label.color = V(g)$fontcolor,
    vertex.label.cex=0,
    vertex.frame.color=ifelse(
      V(g)$geneset == 'hub',
      'black', V(g)$color
    )
    vertex.frame.color=V(g)$framecolor
  )

}
+14 −23
Original line number Diff line number Diff line
# scWGCNA

scWGCNA is a full-featured bioinformatics package based on [Seurat](https://satijalab.org/seurat/index.html) and [WGCNA](https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/) to perform co-expression network analysis in single-cell or single-nucleus RNA-seq datasets.
WGCNA was originally built for the analysis of bulk gene expression datasets, and the performance of
vanilla WGCNA on single-cell data is limited due to the inherent sparsity of scRNA-seq data. To account for this,
scWGCNA has a function to aggregate transcriptionally similar cells into pseudo-bulk ***metacells*** before
running the WGCNA pipeline, greatly reducing the sparsity of the dataset while preserving cellular heterogeneity. Furthermore, WGCNA is a well established tool with many different options and parameters,
so we recommend trying different options in network construction that are best suited to your dataset.
# scWGCNA <img src="man/figures/logo_v2.png" align="right" height="20%" width="20%" />

## Prerequisites
scWGCNA is an R package for performing [weighted gene co-expression network analysis (WGCNA)](https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/) in single-cell
RNA-seq data. scWGCNA constructs co-expression networks in a cell-type-specific manner,
identifies robust modules of highly inerconnected genes, and provides biological
context for these modules. For ease of use, scWGCNA is directly compatible with
[Seurat](https://satijalab.org/seurat/index.html) objects, one of the most common
formats for single-cell data.

To run scWGCNA, you first need to have a single-cell transcriptomic dataset in Seurat format with
clustering and dimensionality reduction already computed. If this all sounds like gibberish to you,
I would recommend first looking at the [Seurat guided clustering tutorial](https://satijalab.org/seurat/articles/pbmc3k_tutorial.html).

## Software requirements
## Installation

scWGCNA has been tested only on R 3.6 and 4.0 on Mac OS and Linux environments (sorry to all Windows bioinformaticians, if there are any of you out there). To run scWGCNA, there are a few other R packages that you need to install. Open up a R session and enter the following commands:
We recommend creating an R [conda environment](https://docs.conda.io/en/latest/)
environment for scWGCNA.

```
conda create -n scWGCNA -c conda-forge r-base r-essentials
```

```
install.packages('WGCNA')
@@ -33,13 +34,3 @@ Now you can install the scWGCNA package using `devtools`:
```
devtools::install_github('smorabit/scWGCNA')
```

## Citation

If scWGCNA is useful in your research, please consider citing our publication.

* [Single-nucleus chromatin accessibility and transcriptomic characterization of Alzheimer's disease](https://www.nature.com/articles/s41588-021-00894-z)

# scWGCNA tutorial

This section is under construction since I have totally changed how scWGCNA works!
+6 −0
Original line number Diff line number Diff line
@@ -14,6 +14,12 @@ authors:
  Samuel Morabito:
    href: https://smorabit.github.io

articles:
  - title: scWGCNA Basics
    navbar: ~
    contents:
      - basic_tutorial

reference:
  - title: Metacells
    desc: Functions for constructing metacells from single-cell data
Loading