update plot functions (9ab4a351) · Commits · github_fork / DAseq2

NAMESPACE

+5 −0

Original line number	Diff line number	Diff line
		# Generated by roxygen2: do not edit by hand

		export(STGlocalMarkers)
		export(STGmarkerFinder)
		export(SeuratLocalMarkers)
		export(SeuratMarkerFinder)
		export(addDAslot)
		export(getDAcells)
		export(getDAregion)
		export(plotCellLabel)
		@@ -16,3 +20,4 @@ import(glmnet)
		import(reticulate)
		import(scales)
		importFrom(caret,createFolds)
		importFrom(e1071,sigmoid)

R/STGmarkerFinder.R

+69 −5

Original line number	Diff line number	Diff line
		@@ -10,7 +10,7 @@
		#' @param da.regions.to.run numeric (vector), which DA regions to run the marker finder,
		#' default is to run all regions
		#' @param lambda numeric, regularization parameter that weights the number of selected genes,
		#' a larger lambda leads to fewer genes, default 1.2
		#' a larger lambda leads to fewer genes, default 1.5
		#' @param n.runs integer, number of runs to run the model, default 5
		#' @param python.use character string, the Python to use, default "/usr/bin/python"
		# @param source.code character string, the neural network source code, default "./STG_model.py"
		@@ -18,6 +18,7 @@
		#' @param GPU which GPU to use, default '', using CPU
		#'
		#' @import reticulate
		#' @importFrom e1071 sigmoid
		#'
		#' @return a list of results:
		#' \describe{
		@@ -36,7 +37,7 @@
		STGmarkerFinder <- function(
		X, da.regions,
		da.regions.to.run = NULL,
		lambda = 1.2, n.runs = 5, return.model = F,
		lambda = 1.5, n.runs = 5, return.model = T,
		python.use = "/usr/bin/python", GPU = ""
		){
		if(!inherits(X, what = "matrix") & !inherits(X, what = "Matrix")){
		@@ -82,7 +83,7 @@ STGmarkerFinder <- function(
		da.model[[as.character(ii)]][["model"]] <- stg.out[[3]]
		da.model[[as.character(ii)]][["features"]] <- rownames(X)[stg.out[[4]] + 1]
		da.model[[as.character(ii)]][["selected.features"]] <- rownames(X)[stg.out[[1]][[n.runs]] + 1]
		da.model[[as.character(ii)]][["pred"]] <- stg.out[[5]][[1]][,2] - stg.out[[5]][[1]][,1]
		da.model[[as.character(ii)]][["pred"]] <- e1071::sigmoid(stg.out[[5]][[1]][,2] - stg.out[[5]][[1]][,1])
		da.model[[as.character(ii)]][["alpha"]] <- as.numeric(stg.out[[6]])
		names(da.model[[as.character(ii)]][["alpha"]]) <- da.model[[as.character(ii)]][["features"]]
		}
		@@ -126,6 +127,58 @@ STGmarkerFinder <- function(



		#' STG local markers
		#' Run STG to find a set of genes that separate a given DA region from a local subset of cells.
		#'
		#' @param X matrix, normalized expression matrix of all cells in the dataset, genes are in rows,
		#' rownames must be gene names
		#' @param da.regions output from the function getDAregion()
		#' @param da.region.to.run numeric, which (single) DA region to find local markers for
		#' @param cell.label.info vector, cell labeling information to select the local subset of cells to compare
		#' with input DA region
		#' @param cell.label.to.run cell label(s) to select from cell.label.info that represent
		#' the local neiborhood for the input DA region
		#' @param lambda numeric, regularization parameter that weights the number of selected genes,
		#' a larger lambda leads to fewer genes, default 1.5
		#' @param n.runs integer, number of runs to run the model, default 5
		#' @param python.use character string, the Python to use, default "/usr/bin/python"
		#' @param return.model a logical value to indicate whether to return the actual model of STG
		#' @param GPU which GPU to use, default '', using CPU
		#'
		#' @return a list of results:
		#' \describe{
		#' \item{markers}{a list of data.frame with markers for each DA region}
		#' \item{accuracy}{a numeric vector showing mean accuracy for each DA region}
		#' \item{model}{a list of model for each DA region, each model contains:
		#' \describe{\item{model}{the model of STG of the final run}
		#' \item{features}{features used to train the model}
		#' \item{selected.features}{the selected features of the final run}
		#' \item{pred}{the linear prediction value for each cell from the model}}
		#' }
		#' }
		#'
		#' @export
		#'
		STGlocalMarkers <- function(
		X, da.regions, da.region.to.run, cell.label.info, cell.label.to.run, ...
		){
		n.cells <- ncol(X)
		if(length(cell.label.info) != n.cells){
		stop("cell.label.info must have the same length as ncol(X).")
		}

		label.da <- rep("notused", n.cells)
		label.da[cell.label.info %in% cell.label.to.run] <- "local"
		label.da[da.regions$da.region.label == da.region.to.run] <- "da"

		output.markers <- runSTG(
		X = X, X.labels = label.da, label.1 = "da", label.2 = "local", ...
		)
		return(output.markers)
		}



		#' Run STG
		#'
		#' Run STG to select a set of genes that separate cells with label.1 from label.2 (other labels)
		@@ -150,6 +203,7 @@ STGmarkerFinder <- function(
		#' \item{accuracy}{a numeric vector showing mean accuracy for each DA region}
		#' \item{model}{a list of model for each DA region, each model contains:
		#' \describe{\item{model}{the model of STG of the final run}
		#' \item{cells}{cell names/indices used to train the model}
		#' \item{features}{features used to train the model}
		#' \item{selected.features}{the selected features of the final run}
		#' \item{pred}{the linear prediction value for each cell from the model}}
		@@ -160,12 +214,16 @@ STGmarkerFinder <- function(
		#'
		runSTG <- function(
		X, X.labels, label.1, label.2 = NULL,
		lambda = 1.5, n.runs = 5, return.model = F,
		lambda = 1.5, n.runs = 5, return.model = T,
		python.use = "/usr/bin/python", GPU = ""
		){
		if(!inherits(X, what = "matrix") & !inherits(X, what = "Matrix")){
		X <- as.matrix(X)
		}
		if(is.null(rownames(X))){
		stop("rownames of X must be gene names.")
		}

		# set Python
		use_python(python = python.use, required = T)
		source_python(file = paste(system.file(package="DAseq"), "DA_STG.py", sep = "/"))
		@@ -175,6 +233,11 @@ runSTG <- function(
		X.use <- which(X.labels %in% c(label.1,label.2))
		X <- X[,X.use]
		X.labels <- X.labels[X.use]
		} else {
		X.use <- c(1:ncol(X))
		}
		if(!is.null(colnames(X))){
		X.use <- colnames(X)[X.use]
		}

		X.py <- r_to_py(as.matrix(X))
		@@ -189,9 +252,10 @@ runSTG <- function(
		da.accr <- mean(stg.out[[2]])
		da.model <- list()
		da.model[["model"]] <- stg.out[[3]]
		da.model[["cells"]] <- X.use
		da.model[["features"]] <- rownames(X)[stg.out[[4]] + 1]
		da.model[["selected.features"]] <- rownames(X)[stg.out[[1]][[n.runs]] + 1]
		da.model[["pred"]] <- stg.out[[5]][[1]][,2] - stg.out[[5]][[1]][,1]
		da.model[["pred"]] <- e1071::sigmoid(stg.out[[5]][[1]][,2] - stg.out[[5]][[1]][,1])

		da.markers.logfc <- sapply(da.markers, function(x, x.data1, x.data2){
		log2(mean(x.data1[x,] + 1/100000) / mean(x.data2[x,] + 1/100000))

R/SeuratMarkerFinder.R

0 → 100644

+108 −0

Original line number	Diff line number	Diff line
		#' DA-seq Step 4: Seurat marker finder to characterize DA regions
		#'
		#' Use Seurat FindMarkers() function to identify characteristic genes for DA regions
		#'
		#' @param object input Seurat object
		#' @param da.slot character, variable name that represents DA regions in Seurat meta.data, default "da"
		#' @param da.regions.to.run numeric (vector), which DA regions to find markers for,
		#' default is to run all regions
		#' @param ... parameters passed to Seurat FindMarkers() function
		#'
		#' @import Seurat
		#'
		#' @return a list of markers for DA regions with statistics
		#'
		#' @export
		#'
		SeuratMarkerFinder <- function(
		object, da.slot = "da", da.regions.to.run = NULL, ...
		){
		# get DA regions to run
		n.da <- length(unique(object@meta.data[,da.slot])) - 1
		if(is.null(da.regions.to.run)){
		da.regions.to.run <- c(1:n.da)
		}

		seurat.version <- substr(packageVersion("Seurat"),1,1)
		if(seurat.version == "3"){
		Idents(object) <- object@meta.data[,da.slot]

		output.markers <- list()
		for(ii in da.regions.to.run){
		output.markers[[as.character(ii)]] <- FindMarkers(
		object, ident.1 = ii, ...
		)
		}
		}

		return(output.markers)
		}



		#' Add DA slot
		#'
		#' Add DA region information to the meta.data of a Seurat object
		#'
		#' @param object input Seurat object
		#' @param da.regions output from function getDAregion()
		#' @param da.slot character, variable name to put in Seurat meta.data, default "da"
		#' @param set.ident a logical value to indicate whether to set Idents of the Seurat object to DA information,
		#' default False
		#'
		#' @import Seurat
		#'
		#' @return updated Seurat object
		#'
		#' @export
		#'
		addDAslot <- function(object, da.regions, da.slot = "da", set.ident = F){
		seurat.version <- substr(packageVersion("Seurat"),1,1)
		if(seurat.version == "3"){
		object@meta.data[,da.slot] <- da.regions$da.region.label
		if(set.ident){
		Idents(object) <- object@meta.data[,da.slot]
		}
		}

		return(object)
		}



		#' Find local markers
		#'
		#' Use Seurat FindMarkers() function to identify genes that distinguish a DA region from its local neighborhood
		#'
		#' @param object input Seurat object
		#' @param da.slot character, variable name that represents DA regions in Seurat meta.data, default "da"
		#' @param da.region.to.run numeric, which (single) DA region to find local markers for
		#' @param cell.label.slot character, variable name that represents cell labeling information in Seurat meta.data
		#' to combine with DA information
		#' @param cell.label.to.run cell label(s) that represent the local neiborhood for the input DA region
		#' @param ... parameters passed to Seurat FindMarkers() function
		#'
		#' @import Seurat
		#'
		#' @return a data.frame of markers and statistics
		#'
		#' @export
		#'
		SeuratLocalMarkers <- function(
		object, da.slot = "da", da.region.to.run,
		cell.label.slot, cell.label.to.run, ...
		){
		seurat.version <- substr(packageVersion("Seurat"),1,1)
		if(seurat.version == "3"){
		object@meta.data$"label_da" <- "notused"
		object@meta.data$"label_da"[object@meta.data[,cell.label.slot] %in% cell.label.to.run] <- "local"
		object@meta.data$"label_da"[object@meta.data[,da.slot] == da.region.to.run] <- "da"
		Idents(object) <- object@meta.data$"label_da"
		output.markers <- FindMarkers(
		object, ident.1 = "da", ident.2 = "local", ...
		)
		}
		return(output.markers)
		}

R/getDAcells.R

+2 −1

Original line number	Diff line number	Diff line
		@@ -10,6 +10,7 @@
		#' @param labels.1 character vector, label name(s) that represent condition 1
		#' @param labels.2 character vector, label name(s) that represent condition 2
		#' @param k.vector vector, k values to create the score vector
		#' @param save.knn a logical value to indicate whether to save computed kNN result, default False
		#' @param alpha numeric, elasticnet mixing parameter passed to glmnet(), default 0 (Ridge)
		#' @param k.folds integer, number of data splits used in the neural network, default 10
		#' @param n.runs integer, number of times to run the neural network to get the predictions, default 10
		@@ -27,7 +28,7 @@
		#' @return a list of results
		#' \describe{
		#' \item{da.ratio}{score vector for each cell}
		#' \item{da.pred}{(mean) prediction from the neural network}
		#' \item{da.pred}{(mean) prediction from the logistic regression}
		#' \item{da.up}{index for DA cells more abundant in condition of labels.2}
		#' \item{da.down}{index for DA cells more abundant in condition of labels.1}
		#' \item{pred.plot}{ggplot object showing the predictions of logistic regression on plot.embedding}

R/plots.R

+9 −6

Original line number	Diff line number	Diff line
		@@ -11,18 +11,19 @@ NULL
		#' @param score numeric vector, a single value to color each cell, continuous
		#' @param cell.col string vector, color bar to use for "score", defaul c("blue","white","red")
		#' @param size numeric, dot size for each cell, default 0.5
		#' @param alpha numeric between 0 to 1, dot opacity for each cell, default 1
		#'
		#' @return a ggplot object
		#' @export
		#'
		plotCellScore <- function(X, score, cell.col = c("blue","white","red"), size = 0.5){
		plotCellScore <- function(X, score, cell.col = c("blue","white","red"), size = 0.5, alpha = 1){
		# Add colnames for X
		colnames(X) <- c("Dim1","Dim2")

		# Plot cells with labels
		myggplot <- ggplot() + theme_cowplot() +
		geom_point(data = data.frame(Dim1 = X[,1], Dim2 = X[,2], Score = score),
		aes(x = Dim1, y = Dim2, col = Score), size = size) +
		aes(x = Dim1, y = Dim2, col = Score), size = size, alpha = alpha) +
		scale_color_gradientn(colours = cell.col)

		return(myggplot)
		@@ -73,20 +74,22 @@ plotDAsite <- function(X, site.list, size = 0.5, cols = NULL){
		#' @param label vector, label for each cell
		#' @param cell.col string vector, color bar to use for cell labels, default ggplot default
		#' @param size numeric, dot size for each cell, default 0.5
		#' @param alpha numeric between 0 to 1, dot opacity for each cell, default 1
		#' @param do.label a logical value indicating whether to add text to mark each cell label
		#' @param label.size numeric, size of text labels, default 4
		#'
		#' @return a ggplot object
		#' @export
		#'
		plotCellLabel <- function(X, label, cell.col = NULL, size = 0.5, do.label = T){
		plotCellLabel <- function(X, label, cell.col = NULL, size = 0.5, alpha = 1, do.label = T, label.size = 4){
		# Add colnames for X
		colnames(X) <- c("Dim1","Dim2")

		# Plot cells with labels
		myggplot <- ggplot() + theme_cowplot() +
		geom_point(data = data.frame(Dim1 = X[,1], Dim2 = X[,2], Group = label, stringsAsFactors = F),
		aes(x = Dim1, y = Dim2, col = Group), size = size) +
		guides(colour = guide_legend(override.aes = list(size=3), title = NULL))
		aes(x = Dim1, y = Dim2, col = Group), size = size, alpha = alpha) +
		guides(colour = guide_legend(override.aes = list(size=3,alpha=1), title = NULL))

		# Change cell color
		if(!is.null(cell.col)){
		@@ -100,7 +103,7 @@ plotCellLabel <- function(X, label, cell.col = NULL, size = 0.5, do.label = T){
		labeldim2 <- by(X[,2], INDICES = label, FUN = median)[mylabels]

		myggplot <- myggplot +
		annotate("text", x = labeldim1, y = labeldim2, label = as.character(mylabels))
		annotate("text", x = labeldim1, y = labeldim2, label = as.character(mylabels), size = label.size)
		}

		return(myggplot)

Admin message