finished module umap functions (51fcfaac) · Commits · github_fork / HdWGCNA

DESCRIPTION

+5 −7

Original line number	Diff line number	Diff line
		Package: scWGCNA
		Title: What the Package Does (One Line, Title Case)
		Title: WGCNA
		Version: 0.0.0.9000
		Authors@R:
		person(given = "First",
		family = "Last",
		role = c("aut", "cre"),
		email = "first.last@example.com",
		comment = c(ORCID = "YOUR-ORCID-ID"))
		Authors@R: c(
		person("Samuel", "Morabito", , "smorabit@uci.edu", role = c("aut", "cre"),
		comment = c(ORCID = "0000-0002-7768-4856"))
		)
		Description: What the package does (one paragraph).
		License: `use_mit_license()`, `use_gpl3_license()` or friends to
		pick a license

R/getters_and_setters.R

+38 −1

Original line number	Diff line number	Diff line
		@@ -759,6 +759,27 @@ ResetModuleNames <- function(
		seurat_obj <- SetROCData(seurat_obj, roc_data, wgcna_name)
		}

		# update motif overlap
		overlap_df <- GetMotifOverlap(seurat_obj, wgcna_name)
		if(!is.null(overlap_df)){
		overlap_df$module <- factor(
		new_mod_df[match(overlap_df$module, new_mod_df$old),'new'],
		levels = as.character(new_mod_df$new)
		)
		seurat_obj <- SetMotifOverlap(seurat_obj, overlap_df, wgcna_name)
		}

		# update module umap:
		umap_df <- GetModuleUMAP(seurat_obj, wgcna_name)
		if(!is.null(umap_df)){
		umap_df$module <- factor(
		new_mod_df[match(umap_df$module, new_mod_df$old),'new'],
		levels = as.character(new_mod_df$new)
		)
		seurat_obj <- SetModuleUMAP(seurat_obj, umap_df, wgcna_name)
		}


		seurat_obj

		}
		@@ -778,7 +799,7 @@ ResetModuleColors <- function(

		# get modules
		modules <- GetModules(seurat_obj, wgcna_name)
		mod_colors <- select(modules, c(module, color)) %>%
		mod_colors <- dplyr::select(modules, c(module, color)) %>%
		distinct %>% arrange(module) %>% .$color
		grey_ind <- which(mod_colors == 'grey')

		@@ -801,6 +822,22 @@ ResetModuleColors <- function(
		# set module table
		seurat_obj <- SetModules(seurat_obj, modules, wgcna_name)


		# update motif overlap
		overlap_df <- GetMotifOverlap(seurat_obj, wgcna_name)
		if(!is.null(overlap_df)){
		overlap_df$color <- new_color_df[match(overlap_df$color, new_color_df$old),'new']
		seurat_obj <- SetMotifOverlap(seurat_obj, overlap_df, wgcna_name)
		}

		# update module umap:
		umap_df <- GetModuleUMAP(seurat_obj, wgcna_name)
		if(!is.null(umap_df)){
		umap_df$module <- new_color_df[match(umap_df$color, new_color_df$old),'new']
		seurat_obj <- SetModuleUMAP(seurat_obj, umap_df, wgcna_name)
		}


		seurat_obj

		}

R/plotting.R

+63 −31

Original line number	Diff line number	Diff line
		@@ -813,7 +813,7 @@ HubGeneNetworkPlot <- function(
		if(col1 == col2){
		col = col1
		} else{
		col = 'gray90'
		col = 'grey90'
		}
		col
		})
		@@ -885,12 +885,16 @@ HubGeneNetworkPlot <- function(
		#' ModuleUMAPPlot
		ModuleUMAPPlot <- function(
		seurat_obj,
		sample_edges = TRUE, # TRUE if we sample edges randomly, FALSE if we take the top edges
		edge_prop = 0.2,
		label_hubs = 5, # how many hub genes to label?
		edge.alpha=0.25,
		vertex.label.cex=0.5, hub.vertex.size=4,
		other.vertex.size=1, repulse.exp=3,
		harmonized=TRUE,
		wgcna_name=NULL,
		vertex.label.cex=0.5,
		hub.vertex.size=4,
		other.vertex.size=1,
		repulse.exp=3,
		return_graph = FALSE, # this returns the igraph object instead of plotting
		wgcna_name=NULL,
		...
		){

		@@ -899,8 +903,7 @@ ModuleUMAPPlot <- function(
		# get the TOM
		TOM <- GetTOM(seurat_obj, wgcna_name)

		# get modules, MEs:
		MEs <- GetMEs(seurat_obj, harmonized, wgcna_name)
		# get modules,
		modules <- GetModules(seurat_obj, wgcna_name)

		# get the UMAP df:
		@@ -915,19 +918,18 @@ ModuleUMAPPlot <- function(
		subset_TOM <- TOM[umap_df$gene, umap_df$gene[umap_df$hub == 'hub']]

		# labels
		selected_modules$label <- ifelse(selected_modules$hub == 'hub', as.character(selected_modules$gene_name), '')
		label_genes <- selected_modules %>% group_by(module) %>% top_n(label_hubs, wt=kME) %>% .$gene_name
		selected_modules$label <- ifelse(selected_modules$gene_name %in% label_genes, selected_modules$gene_name, '')
		selected_modules$fontcolor <- ifelse(selected_modules$color == 'black', 'gray50', 'black')

		# set frome color
		# same color as module for all genes, black outline for the selected hub genes
		selected_modules$framecolor <- ifelse(selected_modules$gene_name %in% label_genes, 'black', selected_modules$color)

		# make sure all nodes have at least one edge!!
		edge_cutoff <- min(sapply(1:nrow(subset_TOM), function(i){max(subset_TOM[i,])}))
		edge_df <- subset_TOM %>% melt %>% subset(value >= edge_cutoff)
		# melt TOM into long format
		edge_df <- subset_TOM %>% melt
		print(dim(edge_df))

		# scale edge values between 0 and 1
		edge_df$value <- scale01(edge_df$value)


		# set color of each edge based on value:
		edge_df$color <- future.apply::future_sapply(1:nrow(edge_df), function(i){
		gene1 = as.character(edge_df[i,'Var1'])
		@@ -939,11 +941,38 @@ ModuleUMAPPlot <- function(
		if(col1 == col2){
		col = col1
		} else{
		col = 'gray90'
		col = 'grey90'
		}
		col
		})

		# subset edges:
		groups <- unique(edge_df$color)
		print(groups)
		if(sample_edges){
		print('here')
		# randomly sample
		temp <- do.call(rbind, lapply(groups, function(cur_group){
		cur_df <- edge_df %>% subset(color == cur_group)
		n_edges <- nrow(cur_df)
		cur_sample <- sample(1:n_edges, round(n_edges * edge_prop))
		cur_df[cur_sample,]
		}))
		} else{

		# get top strongest edges
		temp <- do.call(rbind, lapply(groups, function(cur_group){
		cur_df <- edge_df %>% subset(color == cur_group)
		n_edges <- nrow(cur_df)
		cur_df %>% dplyr::top_n(round(n_edges * edge_prop), wt=value)
		}))
		}

		edge_df <- temp
		print(dim(edge_df))

		# scale edge values between 0 and 1 for each module
		edge_df <- edge_df %>% group_by(color) %>% mutate(value=scale01(value))

		# edges & vertices are plotted in igraph starting with the first row, so re-order s.t. strong edges are on bottom, all gray on the top of the table:
		edge_df <- edge_df %>% arrange(value)
		@@ -953,13 +982,12 @@ ModuleUMAPPlot <- function(
		)
		head(edge_df)


		# set alpha of edges
		edge_df$color <- sapply(1:nrow(edge_df), function(i){
		a = edge_df$value[i]
		#if(edge_df$value[i] < 0.5){a=0.5}
		alpha(edge_df$color[i], alpha=a)
		})
		# set alpha of edges based on kME
		edge_df$color_alpha <- ifelse(
		edge_df$color == 'grey90',
		alpha(edge_df$color, alpha=edge_df$value/2),
		alpha(edge_df$color, alpha=edge_df$value)
		)

		# re-order vertices so hubs are plotted on top
		selected_modules <- rbind(
		@@ -967,6 +995,12 @@ ModuleUMAPPlot <- function(
		subset(selected_modules , hub != 'other')
		)

		# re-order vertices so labeled genes are on top
		selected_modules <- rbind(
		subset(selected_modules , label == ''),
		subset(selected_modules , label != '')
		)

		# setup igraph:
		g <- igraph::graph_from_data_frame(
		edge_df,
		@@ -979,22 +1013,20 @@ ModuleUMAPPlot <- function(
		plot(
		g,
		layout= as.matrix(selected_modules[,c('UMAP1', 'UMAP2')]),
		edge.color=adjustcolor(E(g)$color, alpha.f=edge.alpha),
		# edge.color=adjustcolor(E(g)$color, alpha.f=edge.alpha),
		edge.color=adjustcolor(E(g)$color_alpha, alpha.f=edge.alpha),
		vertex.size=V(g)$kME * 3,
		edge.curved=0,
		edge.width=0.5,
		vertex.color=V(g)$color,
		# vertex.frame.color=V(g)$color,
		# vertex.label=V(g)$label,
		vertex.label="",
		vertex.label=V(g)$label,
		vertex.label.dist=1.1,
		vertex.label.degree=-pi/4,
		vertex.label.family='Helvetica', #vertex.label.font=vertex_df$font,
		vertex.label.font = 3,
		vertex.label.color = V(g)$fontcolor,
		vertex.label.cex=0,
		vertex.frame.color=ifelse(
		V(g)$geneset == 'hub',
		'black', V(g)$color
		)
		vertex.frame.color=V(g)$framecolor
		)

		}

README.md

+14 −23

Original line number	Diff line number	Diff line
		# scWGCNA

		scWGCNA is a full-featured bioinformatics package based on [Seurat](https://satijalab.org/seurat/index.html) and [WGCNA](https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/) to perform co-expression network analysis in single-cell or single-nucleus RNA-seq datasets.
		WGCNA was originally built for the analysis of bulk gene expression datasets, and the performance of
		vanilla WGCNA on single-cell data is limited due to the inherent sparsity of scRNA-seq data. To account for this,
		scWGCNA has a function to aggregate transcriptionally similar cells into pseudo-bulk *metacells* before
		running the WGCNA pipeline, greatly reducing the sparsity of the dataset while preserving cellular heterogeneity. Furthermore, WGCNA is a well established tool with many different options and parameters,
		so we recommend trying different options in network construction that are best suited to your dataset.
		# scWGCNA <img src="man/figures/logo_v2.png" align="right" height="20%" width="20%" />

		## Prerequisites
		scWGCNA is an R package for performing [weighted gene co-expression network analysis (WGCNA)](https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/) in single-cell
		RNA-seq data. scWGCNA constructs co-expression networks in a cell-type-specific manner,
		identifies robust modules of highly inerconnected genes, and provides biological
		context for these modules. For ease of use, scWGCNA is directly compatible with
		[Seurat](https://satijalab.org/seurat/index.html) objects, one of the most common
		formats for single-cell data.

		To run scWGCNA, you first need to have a single-cell transcriptomic dataset in Seurat format with
		clustering and dimensionality reduction already computed. If this all sounds like gibberish to you,
		I would recommend first looking at the [Seurat guided clustering tutorial](https://satijalab.org/seurat/articles/pbmc3k_tutorial.html).

		## Software requirements
		## Installation

		scWGCNA has been tested only on R 3.6 and 4.0 on Mac OS and Linux environments (sorry to all Windows bioinformaticians, if there are any of you out there). To run scWGCNA, there are a few other R packages that you need to install. Open up a R session and enter the following commands:
		We recommend creating an R [conda environment](https://docs.conda.io/en/latest/)
		environment for scWGCNA.

		```
		conda create -n scWGCNA -c conda-forge r-base r-essentials
		```

		```
		install.packages('WGCNA')
		@@ -33,13 +34,3 @@ Now you can install the scWGCNA package using `devtools`:
		```
		devtools::install_github('smorabit/scWGCNA')
		```

		## Citation

		If scWGCNA is useful in your research, please consider citing our publication.

		* [Single-nucleus chromatin accessibility and transcriptomic characterization of Alzheimer's disease](https://www.nature.com/articles/s41588-021-00894-z)

		# scWGCNA tutorial

		This section is under construction since I have totally changed how scWGCNA works!

_pkgdown.yml

+6 −0

Original line number	Diff line number	Diff line
		@@ -14,6 +14,12 @@ authors:
		Samuel Morabito:
		href: https://smorabit.github.io

		articles:
		- title: scWGCNA Basics
		navbar: ~
		contents:
		- basic_tutorial

		reference:
		- title: Metacells
		desc: Functions for constructing metacells from single-cell data

Admin message