Commit 3a367fc4 authored by Jun Zhao's avatar Jun Zhao
Browse files

add documentation

parent 9b22e371
Loading
Loading
Loading
Loading
+40 −3
Original line number Original line Diff line number Diff line
@@ -3,20 +3,39 @@ library(ggplot2)
library(cowplot)
library(cowplot)
library(RColorBrewer)
library(RColorBrewer)
library(scales)
library(scales)
library(diffusionMap)
library(tclust)
library(tclust)






## Step1: select cells with DA neighborhood
## Step1: select cells with DA neighborhood


#' @param X size N-by-p matrix, input merged dataset of interest after dimension reduction
#' @param cell.labels size N vector, labels for each input cell
#' @param labels.1 vector, label name(s) that represent condition 1
#' @param labels.2 vector, label name(s) that represent condition 2
#' @param k.vector vector, k values to create the score vector
#' @param ratio maximum ratio of cells to keep, default 0.2
#' @param i.max maximum iteration to run in the iterative clustering of the score vector, default 10
#' @param do.diffuse a logical value to indicate whether to calculate diffusion coordinates for X, default False
#' @param neigen number of diffusion coordinates, default 20
#' @param do.plot a logical value to indicate whether to return ggplot objects showing the results, default True
#' @param plot.embedding size N-by-2 matrix, 2D embedding for the cells
#' @param size cell size to use in the plot, default 0.5
#' 
#' @return a list of results
#'         da.ratio: score vector for each cell
#'         da.cell.idx: cell index with the most DA neighborhood
#'         da.plot: ggplot object showing the steps of iterative clustering result on plot.embedding
#'         da.cells.plot: ggplot object highlighting cells of da.cell.idx on plot.embedding

getDAcells <- function(
getDAcells <- function(
  X, cell.labels, labels.1, labels.2, k.vector,
  X, cell.labels, labels.1, labels.2, k.vector,
  ratio = 0.2, i.max = 10, 
  ratio = 0.2, i.max = 10, 
  do.diffuse = T, neigen = 20, do.plot = T, plot.embedding = NULL, size = 0.5
  do.diffuse = F, neigen = 20, do.plot = T, plot.embedding = NULL, size = 0.5
){
){
  # get diffusion coordinates
  # get diffusion coordinates
  if(do.diffuse){
  if(do.diffuse){
    library(diffusionMap)
    cat("Calculating diffusion coordinates.\n")
    cat("Calculating diffusion coordinates.\n")
    X.input <- X
    X.input <- X
    X <- diffuse(D = dist(X.input), neigen = neigen)$X
    X <- diffuse(D = dist(X.input), neigen = neigen)$X
@@ -73,6 +92,23 @@ getDAcells <- function(


## Step2: get DA regions from DA cells in Step1
## Step2: get DA regions from DA cells in Step1


#' @param X size N-by-p matrix, input merged dataset of interest after dimension reduction
#' @param cell.idx result "da.cell.idx" from the output of function getDAcells
#' @param k number of DA regions to find for cells from function getDAcells
#' @param alpha estimated ratio of outliers of cells from function getDAcells
#' @param restr.fact parameter inherited from function "tclust"
#' @param cell.labels size N vector, labels for each input cell
#' @param labels.1 vector, label name(s) that represent condition 1
#' @param labels.2 vector, label name(s) that represent condition 2
#' @param do.plot a logical value to indicate whether to return ggplot objects showing the results, default True
#' @param plot.embedding size N-by-2 matrix, 2D embedding for the cells
#' @param size cell size to use in the plot, default 0.5
#' 
#' @return a list of results
#'         cluster.res: DA region number for each cell from cell.idx, '0' represents outlier cells
#'         DA.stat: a table showing DA score and p-value for each DA region
#'         da.region.plot: ggplot object showing DA regions (cells in gray are outliers) on plot.embedding

getDAregion <- function(
getDAregion <- function(
  X, cell.idx, k, alpha, restr.fact = 50,
  X, cell.idx, k, alpha, restr.fact = 50,
  cell.labels, labels.1, labels.2, 
  cell.labels, labels.1, labels.2, 
@@ -110,7 +146,8 @@ getDAregion <- function(






## Useful functions
##=======================================================##
## Other functions


# calculate knn.diff.ratio for each cell
# calculate knn.diff.ratio for each cell
daPerCell <- function(
daPerCell <- function(
+39 −1
Original line number Original line Diff line number Diff line
# DA-seq
# DA-seq (Detecting regions of differential abundance  between scRNA-seq  datasets)
 No newline at end of file

## Introduction
DA-seq is a method to detect cell subpopulations with differential abundance between single cell RNA-seq (scRNA-seq) datasets from different samples. Given a low dimensional transformation, for example principal component analysis (PCA), of the merged gene expression matrices, DA-seq first computes a score vector for each cell to represent the DA behavior in the neighborhood to select cells in the most DA areas; then groups these cells into distinct DA regions.

This repository contains codes for running DA-seq in R.


## Dependencies
Required packages: RANN, tclust, ggplot2, cowplot, RColorBrewer, scales

Suggested package: diffusionMap


## Usage
DA-seq can be used as follows:

Let X be a N-by-p matrix of the PCA embeddings of merged scRNA-seq datasets A and B; X.label be a vector of N specifying the original of each cell ('A' or 'B'); X.2d be the 2D embedding of the cells.

~~~~
X.da.cells <- getDAcells(
  X = X, 
  cell.labels = X.label, 
  labels.1 = "A", 
  labels.2 = "B", 
  k.vector = seq(50,500,10), 
  plot.embedding = X.2d
)

X.da.regions <- getDAregion(
  X = X, 
  cell.idx = X.da.cells$da.cell.idx, 
  k = 4, alpha = 0.05, 
  cell.labels = X.label, 
  labels.1 = "A", 
  labels.2 = "B", 
  plot.embedding = X.2d
)
~~~~