Commit 9ab4a351 authored by Jun Zhao's avatar Jun Zhao
Browse files

update plot functions

parent 55bb9e74
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
# Generated by roxygen2: do not edit by hand

export(STGlocalMarkers)
export(STGmarkerFinder)
export(SeuratLocalMarkers)
export(SeuratMarkerFinder)
export(addDAslot)
export(getDAcells)
export(getDAregion)
export(plotCellLabel)
@@ -16,3 +20,4 @@ import(glmnet)
import(reticulate)
import(scales)
importFrom(caret,createFolds)
importFrom(e1071,sigmoid)
+69 −5
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@
#' @param da.regions.to.run numeric (vector), which DA regions to run the marker finder,
#' default is to run all regions
#' @param lambda numeric, regularization parameter that weights the number of selected genes,
#' a larger lambda leads to fewer genes, default 1.2
#' a larger lambda leads to fewer genes, default 1.5
#' @param n.runs integer, number of runs to run the model, default 5
#' @param python.use character string, the Python to use, default "/usr/bin/python"
# @param source.code character string, the neural network source code, default "./STG_model.py"
@@ -18,6 +18,7 @@
#' @param GPU which GPU to use, default '', using CPU
#'
#' @import reticulate
#' @importFrom e1071 sigmoid
#'
#' @return a list of results:
#' \describe{
@@ -36,7 +37,7 @@
STGmarkerFinder <- function(
  X, da.regions,
  da.regions.to.run = NULL,
  lambda = 1.2, n.runs = 5, return.model = F,
  lambda = 1.5, n.runs = 5, return.model = T,
  python.use = "/usr/bin/python", GPU = ""
){
  if(!inherits(X, what = "matrix") & !inherits(X, what = "Matrix")){
@@ -82,7 +83,7 @@ STGmarkerFinder <- function(
    da.model[[as.character(ii)]][["model"]] <- stg.out[[3]]
    da.model[[as.character(ii)]][["features"]] <- rownames(X)[stg.out[[4]] + 1]
    da.model[[as.character(ii)]][["selected.features"]] <- rownames(X)[stg.out[[1]][[n.runs]] + 1]
    da.model[[as.character(ii)]][["pred"]] <- stg.out[[5]][[1]][,2] - stg.out[[5]][[1]][,1]
    da.model[[as.character(ii)]][["pred"]] <- e1071::sigmoid(stg.out[[5]][[1]][,2] - stg.out[[5]][[1]][,1])
    da.model[[as.character(ii)]][["alpha"]] <- as.numeric(stg.out[[6]])
    names(da.model[[as.character(ii)]][["alpha"]]) <- da.model[[as.character(ii)]][["features"]]
  }
@@ -126,6 +127,58 @@ STGmarkerFinder <- function(



#' STG local markers
#' Run STG to find a set of genes that separate a given DA region from a local subset of cells.
#'
#' @param X matrix, normalized expression matrix of all cells in the dataset, genes are in rows,
#' rownames must be gene names
#' @param da.regions output from the function getDAregion()
#' @param da.region.to.run numeric, which (single) DA region to find local markers for
#' @param cell.label.info vector, cell labeling information to select the local subset of cells to compare
#' with input DA region
#' @param cell.label.to.run cell label(s) to select from cell.label.info that represent
#' the local neiborhood for the input DA region
#' @param lambda numeric, regularization parameter that weights the number of selected genes,
#' a larger lambda leads to fewer genes, default 1.5
#' @param n.runs integer, number of runs to run the model, default 5
#' @param python.use character string, the Python to use, default "/usr/bin/python"
#' @param return.model a logical value to indicate whether to return the actual model of STG
#' @param GPU which GPU to use, default '', using CPU
#'
#' @return a list of results:
#' \describe{
#'   \item{markers}{a list of data.frame with markers for each DA region}
#'   \item{accuracy}{a numeric vector showing mean accuracy for each DA region}
#'   \item{model}{a list of model for each DA region, each model contains:
#'     \describe{\item{model}{the model of STG of the final run}
#'     \item{features}{features used to train the model}
#'     \item{selected.features}{the selected features of the final run}
#'     \item{pred}{the linear prediction value for each cell from the model}}
#'   }
#' }
#'
#' @export
#'
STGlocalMarkers <- function(
  X, da.regions, da.region.to.run, cell.label.info, cell.label.to.run, ...
){
  n.cells <- ncol(X)
  if(length(cell.label.info) != n.cells){
    stop("cell.label.info must have the same length as ncol(X).")
  }

  label.da <- rep("notused", n.cells)
  label.da[cell.label.info %in% cell.label.to.run] <- "local"
  label.da[da.regions$da.region.label == da.region.to.run] <- "da"

  output.markers <- runSTG(
    X = X, X.labels = label.da, label.1 = "da", label.2 = "local", ...
  )
  return(output.markers)
}



#' Run STG
#'
#' Run STG to select a set of genes that separate cells with label.1 from label.2 (other labels)
@@ -150,6 +203,7 @@ STGmarkerFinder <- function(
#'   \item{accuracy}{a numeric vector showing mean accuracy for each DA region}
#'   \item{model}{a list of model for each DA region, each model contains:
#'     \describe{\item{model}{the model of STG of the final run}
#'     \item{cells}{cell names/indices used to train the model}
#'     \item{features}{features used to train the model}
#'     \item{selected.features}{the selected features of the final run}
#'     \item{pred}{the linear prediction value for each cell from the model}}
@@ -160,12 +214,16 @@ STGmarkerFinder <- function(
#'
runSTG <- function(
  X, X.labels, label.1, label.2 = NULL,
  lambda = 1.5, n.runs = 5, return.model = F,
  lambda = 1.5, n.runs = 5, return.model = T,
  python.use = "/usr/bin/python", GPU = ""
){
  if(!inherits(X, what = "matrix") & !inherits(X, what = "Matrix")){
    X <- as.matrix(X)
  }
  if(is.null(rownames(X))){
    stop("rownames of X must be gene names.")
  }

  # set Python
  use_python(python = python.use, required = T)
  source_python(file = paste(system.file(package="DAseq"), "DA_STG.py", sep = "/"))
@@ -175,6 +233,11 @@ runSTG <- function(
    X.use <- which(X.labels %in% c(label.1,label.2))
    X <- X[,X.use]
    X.labels <- X.labels[X.use]
  } else {
    X.use <- c(1:ncol(X))
  }
  if(!is.null(colnames(X))){
    X.use <- colnames(X)[X.use]
  }

  X.py <- r_to_py(as.matrix(X))
@@ -189,9 +252,10 @@ runSTG <- function(
  da.accr <- mean(stg.out[[2]])
  da.model <- list()
  da.model[["model"]] <- stg.out[[3]]
  da.model[["cells"]] <- X.use
  da.model[["features"]] <- rownames(X)[stg.out[[4]] + 1]
  da.model[["selected.features"]] <- rownames(X)[stg.out[[1]][[n.runs]] + 1]
  da.model[["pred"]] <- stg.out[[5]][[1]][,2] - stg.out[[5]][[1]][,1]
  da.model[["pred"]] <- e1071::sigmoid(stg.out[[5]][[1]][,2] - stg.out[[5]][[1]][,1])

  da.markers.logfc <- sapply(da.markers, function(x, x.data1, x.data2){
    log2(mean(x.data1[x,] + 1/100000) / mean(x.data2[x,] + 1/100000))

R/SeuratMarkerFinder.R

0 → 100644
+108 −0
Original line number Diff line number Diff line
#' DA-seq Step 4: Seurat marker finder to characterize DA regions
#'
#' Use Seurat FindMarkers() function to identify characteristic genes for DA regions
#'
#' @param object input Seurat object
#' @param da.slot character, variable name that represents DA regions in Seurat meta.data, default "da"
#' @param da.regions.to.run numeric (vector), which DA regions to find markers for,
#' default is to run all regions
#' @param ... parameters passed to Seurat FindMarkers() function
#'
#' @import Seurat
#'
#' @return a list of markers for DA regions with statistics
#'
#' @export
#'
SeuratMarkerFinder <- function(
  object, da.slot = "da", da.regions.to.run = NULL, ...
){
  # get DA regions to run
  n.da <- length(unique(object@meta.data[,da.slot])) - 1
  if(is.null(da.regions.to.run)){
    da.regions.to.run <- c(1:n.da)
  }

  seurat.version <- substr(packageVersion("Seurat"),1,1)
  if(seurat.version == "3"){
    Idents(object) <- object@meta.data[,da.slot]

    output.markers <- list()
    for(ii in da.regions.to.run){
      output.markers[[as.character(ii)]] <- FindMarkers(
        object, ident.1 = ii, ...
      )
    }
  }

  return(output.markers)
}



#' Add DA slot
#'
#' Add DA region information to the meta.data of a Seurat object
#'
#' @param object input Seurat object
#' @param da.regions output from function getDAregion()
#' @param da.slot character, variable name to put in Seurat meta.data, default "da"
#' @param set.ident a logical value to indicate whether to set Idents of the Seurat object to DA information,
#' default False
#'
#' @import Seurat
#'
#' @return updated Seurat object
#'
#' @export
#'
addDAslot <- function(object, da.regions, da.slot = "da", set.ident = F){
  seurat.version <- substr(packageVersion("Seurat"),1,1)
  if(seurat.version == "3"){
    object@meta.data[,da.slot] <- da.regions$da.region.label
    if(set.ident){
      Idents(object) <- object@meta.data[,da.slot]
    }
  }

  return(object)
}



#' Find local markers
#'
#' Use Seurat FindMarkers() function to identify genes that distinguish a DA region from its local neighborhood
#'
#' @param object input Seurat object
#' @param da.slot character, variable name that represents DA regions in Seurat meta.data, default "da"
#' @param da.region.to.run numeric, which (single) DA region to find local markers for
#' @param cell.label.slot character, variable name that represents cell labeling information in Seurat meta.data
#'  to combine with DA information
#' @param cell.label.to.run cell label(s) that represent the local neiborhood for the input DA region
#' @param ... parameters passed to Seurat FindMarkers() function
#'
#' @import Seurat
#'
#' @return a data.frame of markers and statistics
#'
#' @export
#'
SeuratLocalMarkers <- function(
  object, da.slot = "da", da.region.to.run,
  cell.label.slot, cell.label.to.run, ...
){
  seurat.version <- substr(packageVersion("Seurat"),1,1)
  if(seurat.version == "3"){
    object@meta.data$"label_da" <- "notused"
    object@meta.data$"label_da"[object@meta.data[,cell.label.slot] %in% cell.label.to.run] <- "local"
    object@meta.data$"label_da"[object@meta.data[,da.slot] == da.region.to.run] <- "da"
    Idents(object) <- object@meta.data$"label_da"
    output.markers <- FindMarkers(
      object, ident.1 = "da", ident.2 = "local", ...
    )
  }
  return(output.markers)
}

+2 −1
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#' @param labels.1 character vector, label name(s) that represent condition 1
#' @param labels.2 character vector, label name(s) that represent condition 2
#' @param k.vector vector, k values to create the score vector
#' @param save.knn a logical value to indicate whether to save computed kNN result, default False
#' @param alpha numeric, elasticnet mixing parameter passed to glmnet(), default 0 (Ridge)
#' @param k.folds integer, number of data splits used in the neural network, default 10
#' @param n.runs integer, number of times to run the neural network to get the predictions, default 10
@@ -27,7 +28,7 @@
#' @return a list of results
#' \describe{
#'   \item{da.ratio}{score vector for each cell}
#'   \item{da.pred}{(mean) prediction from the neural network}
#'   \item{da.pred}{(mean) prediction from the logistic regression}
#'   \item{da.up}{index for DA cells more abundant in condition of labels.2}
#'   \item{da.down}{index for DA cells more abundant in condition of labels.1}
#'   \item{pred.plot}{ggplot object showing the predictions of logistic regression on plot.embedding}
+9 −6
Original line number Diff line number Diff line
@@ -11,18 +11,19 @@ NULL
#' @param score numeric vector, a single value to color each cell, continuous
#' @param cell.col string vector, color bar to use for "score", defaul c("blue","white","red")
#' @param size numeric, dot size for each cell, default 0.5
#' @param alpha numeric between 0 to 1, dot opacity for each cell, default 1
#'
#' @return a ggplot object
#' @export
#'
plotCellScore <- function(X, score, cell.col = c("blue","white","red"), size = 0.5){
plotCellScore <- function(X, score, cell.col = c("blue","white","red"), size = 0.5, alpha = 1){
  # Add colnames for X
  colnames(X) <- c("Dim1","Dim2")

  # Plot cells with labels
  myggplot <- ggplot() + theme_cowplot() +
    geom_point(data = data.frame(Dim1 = X[,1], Dim2 = X[,2], Score = score),
               aes(x = Dim1, y = Dim2, col = Score), size = size) +
               aes(x = Dim1, y = Dim2, col = Score), size = size, alpha = alpha) +
    scale_color_gradientn(colours = cell.col)

  return(myggplot)
@@ -73,20 +74,22 @@ plotDAsite <- function(X, site.list, size = 0.5, cols = NULL){
#' @param label vector, label for each cell
#' @param cell.col string vector, color bar to use for cell labels, default ggplot default
#' @param size numeric, dot size for each cell, default 0.5
#' @param alpha numeric between 0 to 1, dot opacity for each cell, default 1
#' @param do.label a logical value indicating whether to add text to mark each cell label
#' @param label.size numeric, size of text labels, default 4
#'
#' @return a ggplot object
#' @export
#'
plotCellLabel <- function(X, label, cell.col = NULL, size = 0.5, do.label = T){
plotCellLabel <- function(X, label, cell.col = NULL, size = 0.5, alpha = 1, do.label = T, label.size = 4){
  # Add colnames for X
  colnames(X) <- c("Dim1","Dim2")

  # Plot cells with labels
  myggplot <- ggplot() + theme_cowplot() +
    geom_point(data = data.frame(Dim1 = X[,1], Dim2 = X[,2], Group = label, stringsAsFactors = F),
               aes(x = Dim1, y = Dim2, col = Group), size = size) +
    guides(colour = guide_legend(override.aes = list(size=3), title = NULL))
               aes(x = Dim1, y = Dim2, col = Group), size = size, alpha = alpha) +
    guides(colour = guide_legend(override.aes = list(size=3,alpha=1), title = NULL))

  # Change cell color
  if(!is.null(cell.col)){
@@ -100,7 +103,7 @@ plotCellLabel <- function(X, label, cell.col = NULL, size = 0.5, do.label = T){
    labeldim2 <- by(X[,2], INDICES = label, FUN = median)[mylabels]

    myggplot <- myggplot +
      annotate("text", x = labeldim1, y = labeldim2, label = as.character(mylabels))
      annotate("text", x = labeldim1, y = labeldim2, label = as.character(mylabels), size = label.size)
  }

  return(myggplot)
Loading