update default (d1e6813b) · Commits · github_fork / DAseq2

R/getDAcells.R

+40 −30

Original line number	Diff line number	Diff line
		@@ -14,9 +14,9 @@
		#' @param alpha numeric, elasticnet mixing parameter passed to glmnet(), default 0 (Ridge)
		#' @param k.folds integer, number of data splits used in the neural network, default 10
		#' @param n.runs integer, number of times to run the neural network to get the predictions, default 5
		#' @param n.rand integer, number of random permutations to run, default 5
		#' @param n.rand integer, number of random permutations to run, default 2
		#' @param pred.thres length-2 vector, top and bottom threshold on DA measure,
		#' default (-0.8,0.8), select significant DA cells with at least 9:1 abundance difference
		#' default NULL, select significant DA cells based on permutation
		#' @param do.plot a logical value to indicate whether to return ggplot objects showing the results,
		#' default True
		#' @param plot.embedding size N-by-2 matrix, 2D embedding for the cells
		@@ -40,7 +40,7 @@

		getDAcells <- function(
		X, cell.labels, labels.1, labels.2, k.vector = NULL, save.knn = F,
		alpha = 0, k.folds = 10, n.runs = 5, n.rand = 2, pred.thres = c(-0.8,0.8),
		alpha = 0, k.folds = 10, n.runs = 5, n.rand = 2, pred.thres = NULL,
		do.plot = T, plot.embedding = NULL, size = 0.5
		){
		if(!inherits(x = X, what = "matrix")){
		@@ -133,6 +133,10 @@ getDAcells <- function(
		X.random.pred <- unlist(X.random.pred)

		# select DA cells
		if(is.null(pred.thres)){
		cat("Setting thresholds based on permutation\n")
		pred.thres <- c(min(X.random.pred),max(X.random.pred))
		} else {
		pred.thres <- sort(pred.thres, decreasing = F)
		if(max(X.random.pred) > pred.thres[2]){
		warning("User input top threshold not within significance range, updating top threshold to ", format(max(X.random.pred), digits = 3))
		@@ -142,6 +146,7 @@ getDAcells <- function(
		warning("User input bottom threshold not within significance range, updating bottom threshold to ", format(min(X.random.pred), digits = 3))
		pred.thres[1] <- min(X.random.pred)
		}
		}
		X.da.up <- which(X.pred > pred.thres[2])
		X.da.down <- which(X.pred < pred.thres[1])

		@@ -218,7 +223,7 @@ getDAcells <- function(
		#'
		#' @param X output from getDAcells()
		#' @param pred.thres length-2 vector, top and bottom threshold on DA measure,
		#' default (-0.8,0.8), select significant DA cells with at least 9:1 abundance difference
		#' default NULL, select significant DA cells based on permutation
		#' @param force.thres a logical value to indicate whether to forcefully use pred.thres without considering significance, default False
		#' @param alpha set this parameter to not NULL to rerun Logistic regression
		#' @param do.plot a logical value to indicate whether to return ggplot objects showing the results,
		@@ -231,7 +236,7 @@ getDAcells <- function(
		#' @export

		updateDAcells <- function(
		X, pred.thres = c(-0.8,0.8), force.thres = F,
		X, pred.thres = NULL, force.thres = F,
		alpha = NULL, k.folds = 10, n.runs = 10,
		cell.labels = NULL, labels.1 = NULL, labels.2 = NULL,
		do.plot = T, plot.embedding = NULL, size = 0.5
		@@ -267,6 +272,10 @@ updateDAcells <- function(

		# select DA cells
		X.random.pred <- unlist(X$rand.pred)
		if(is.null(pred.thres)){
		cat("Setting thresholds based on permutation\n")
		pred.thres <- c(min(X.random.pred),max(X.random.pred))
		} else {
		pred.thres <- sort(pred.thres, decreasing = F)
		if(force.thres){
		if(max(X.random.pred) > pred.thres[2]){
		@@ -287,6 +296,7 @@ updateDAcells <- function(
		pred.thres[1] <- min(X.random.pred)
		}
		}
		}
		X.da.up <- which(X.pred > pred.thres[2])
		X.da.down <- which(X.pred < pred.thres[1])

R/getDAregion.R

+7 −2

Original line number	Diff line number	Diff line
		@@ -11,7 +11,7 @@
		#' @param prune.SNN parameter for Seurat function FindNeighbors(), default 1/15
		#' @param resolution parameter for Seurat function FindClusters(), default 0.05
		#' @param group.singletons parameter for Seurat function FindClusters(), default True
		#' @param min.cell integer, number of cells below which a DA region will be removed as outliers, default 15
		#' @param min.cell integer, number of cells below which a DA region will be removed as outliers, default NULL, use minimum k value in k-vector
		#' @param do.plot a logical value to indicate whether to return ggplot objects showing the results, default True
		#' @param plot.embedding size N-by-2 matrix, 2D embedding for the cells
		#' @param size cell size to use in the plot, default 0.5
		@@ -35,7 +35,7 @@
		getDAregion <- function(
		X, da.cells,
		cell.labels, labels.1, labels.2,
		prune.SNN = 1/15, resolution = 0.05, group.singletons = F, min.cell = 15,
		prune.SNN = 1/15, resolution = 0.05, group.singletons = F, min.cell = NULL,
		do.plot = T, plot.embedding = NULL, size = 0.5, do.label = F,
		...
		){
		@@ -56,6 +56,11 @@ getDAregion <- function(
		if(length(setdiff(cell.labels, c(labels.1, labels.2))) > 0){
		stop("Input parameter cell.labels contain labels not from labels.1 or labels.2")
		}
		if(is.null(min.cell)){
		min.cell <- as.integer(colnames(da.cells$da.ratio)[1])
		cat("Using min.cell = ", min.cell, "\n", sep = "")
		}

		seurat.version <- substr(packageVersion("Seurat"),1,1)
		if(seurat.version == "3"){
		X.S <- CreateSeuratObject(counts = t(X))

docs/articles/tutorial.html

+31 −27

Original line number	Diff line number	Diff line
		@@ -154,7 +154,7 @@
		<li>
		<code>da.ratio</code>, score vector for each cell</li>
		<li>
		<code>da.pred</code>, prediction values from logistic classifier</li>
		<code>da.pred</code>, DA measure for each cell</li>
		<li>
		<code>da.up</code>, index of DA cells more abundant in <em>labels.2</em> (non-responders)</li>
		<li>
		@@ -166,18 +166,21 @@
		## ..- attr(*, "dimnames")=List of 2
		## .. ..$ : NULL
		## .. ..$ : chr [1:10] "50" "100" "150" "200" ...
		## $ da.pred : num [1:16291] 0.386 0.792 0.679 0.633 0.352 ...</code></pre>
		## $ da.pred : num [1:16291] -0.5088 0.3308 0.0449 -0.0564 -0.5612 ...</code></pre>
		<p>The prediction values are overlayed on the 2D embedding in the <code>pred.plot</code> slot of the output.</p>
		<div class="sourceCode" id="cb10"><html><body><pre class="r"><span class="no">da_cells</span>$<span class="no">pred.plot</span></pre></body></html></div>
		<p><img src="tutorial_files/figure-html/unnamed-chunk-7-1.png" width="480"></p>
		<p>By default, cells with prediction values in the top and bottom <strong>5%</strong> quantile are selected as DA cells, this can be changed by running <code><a href="../reference/updateDAcells.html">updateDAcells()</a></code>. In this data, the number of cells from non-responder samples are about twice as responder samples. Here we change it to top <strong>10%</strong> (for non-responder) and bottom <strong>5%</strong> (for responder) quantile.</p>
		<div class="sourceCode" id="cb11"><html><body><pre class="r"><span class="no">da_cells</span> <span class="kw"><-</span> <span class="fu"><a href="../reference/updateDAcells.html">updateDAcells</a></span>(
		<span class="kw">X</span> <span class="kw">=</span> <span class="no">da_cells</span>, <span class="kw">pred.thres</span> <span class="kw">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span>(<span class="fl">0.05</span>,<span class="fl">0.9</span>),
		<p>DAseq runs a random permutation on the labels to generate the null distribution for DA measure. We can check the permutation results in the <code>rand.plot</code> slot.</p>
		<div class="sourceCode" id="cb11"><html><body><pre class="r"><span class="no">da_cells</span>$<span class="no">rand.plot</span></pre></body></html></div>
		<p><img src="tutorial_files/figure-html/unnamed-chunk-8-1.png" width="432"></p>
		<p>By default, cells with DA measure values larger than the maximum or small than the minumum than permutated DA measure are selected as DA cells. The threshold can be changed by running <code><a href="../reference/updateDAcells.html">updateDAcells()</a></code> to look at the most salient DA cells. In this data, we use <strong>0.8</strong> (for non-responder) and <strong>-0.8</strong> (for responder).</p>
		<div class="sourceCode" id="cb12"><html><body><pre class="r"><span class="no">da_cells</span> <span class="kw"><-</span> <span class="fu"><a href="../reference/updateDAcells.html">updateDAcells</a></span>(
		<span class="kw">X</span> <span class="kw">=</span> <span class="no">da_cells</span>, <span class="kw">pred.thres</span> <span class="kw">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html">c</a></span>(-<span class="fl">0.8</span>,<span class="fl">0.8</span>),
		<span class="kw">plot.embedding</span> <span class="kw">=</span> <span class="no">X.2d.melanoma</span>
		)</pre></body></html></div>
		<p>Selected DA cells are highlighted in the 2D embedding in the <code>da.cells.plot</code> slot. Cells in red are in the top quantile (these cells are more abundant in non-responder samples), blue in the bottom (these cells are more abundant in responder samples).</p>
		<div class="sourceCode" id="cb12"><html><body><pre class="r"><span class="no">da_cells</span>$<span class="no">da.cells.plot</span></pre></body></html></div>
		<p><img src="tutorial_files/figure-html/unnamed-chunk-9-1.png" width="432"></p>
		<p>Selected DA cells are highlighted in the 2D embedding in the <code>da.cells.plot</code> slot. Cells in red are in the top (these cells are more abundant in non-responder samples), blue in the bottom (these cells are more abundant in responder samples).</p>
		<div class="sourceCode" id="cb13"><html><body><pre class="r"><span class="no">da_cells</span>$<span class="no">da.cells.plot</span></pre></body></html></div>
		<p><img src="tutorial_files/figure-html/unnamed-chunk-10-1.png" width="432"></p>
		</div>
		<div id="get-da-regions" class="section level3">
		<h3 class="hasAnchor">
		@@ -191,9 +194,10 @@
		<li>
		<code>resolution</code>: clustering parameter, default 0.05, use a larger value to group DA cells into more regions</li>
		<li>
		<code>min.cell</code>: minimum number of cells, regions with fewer cells will be removed, default 15</li>
		<code>min.cell</code>: minimum number of cells, regions with fewer cells will be removed, default is to use the minumum <em>k</em> value in the <code>k.vector</code>
		</li>
		</ul>
		<div class="sourceCode" id="cb13"><html><body><pre class="r"><span class="no">da_regions</span> <span class="kw"><-</span> <span class="fu"><a href="../reference/getDAregion.html">getDAregion</a></span>(
		<div class="sourceCode" id="cb14"><html><body><pre class="r"><span class="no">da_regions</span> <span class="kw"><-</span> <span class="fu"><a href="../reference/getDAregion.html">getDAregion</a></span>(
		<span class="kw">X</span> <span class="kw">=</span> <span class="no">X.melanoma</span>,
		<span class="kw">da.cells</span> <span class="kw">=</span> <span class="no">da_cells</span>,
		<span class="kw">cell.labels</span> <span class="kw">=</span> <span class="no">X.label.melanoma</span>,
		@@ -209,23 +213,23 @@
		<li>
		<code>DA.stat</code>, statistics of each DA subpopulation</li>
		</ul>
		<div class="sourceCode" id="cb14"><html><body><pre class="r"><span class="fu"><a href="https://rdrr.io/r/utils/str.html">str</a></span>(<span class="no">da_regions</span>[<span class="fl">1</span>:<span class="fl">2</span>])</pre></body></html></div>
		<div class="sourceCode" id="cb15"><html><body><pre class="r"><span class="fu"><a href="https://rdrr.io/r/utils/str.html">str</a></span>(<span class="no">da_regions</span>[<span class="fl">1</span>:<span class="fl">2</span>])</pre></body></html></div>
		<pre><code>## List of 2
		## $ da.region.label: num [1:16291] 0 0 0 0 0 0 0 0 0 0 ...
		## $ DA.stat : num [1:5, 1:3] 0.973 0.97 -0.952 -0.981 -0.939 ...
		## $ DA.stat : num [1:5, 1:3] 0.973 0.965 -0.933 -0.976 -0.904 ...
		## ..- attr(*, "dimnames")=List of 2
		## .. ..$ : NULL
		## .. ..$ : chr [1:3] "DA.score" "pval.wilcoxon" "pval.ttest"</code></pre>
		<p>Clustering result is shown in the plot: <code>da.region.plot</code> slot.</p>
		<div class="sourceCode" id="cb16"><html><body><pre class="r"><span class="no">da_regions</span>$<span class="no">da.region.plot</span></pre></body></html></div>
		<p><img src="tutorial_files/figure-html/unnamed-chunk-12-1.png" width="480"></p>
		<div class="sourceCode" id="cb17"><html><body><pre class="r"><span class="no">da_regions</span>$<span class="no">da.region.plot</span></pre></body></html></div>
		<p><img src="tutorial_files/figure-html/unnamed-chunk-13-1.png" width="480"></p>
		</div>
		<div id="get-markers-for-each-da-subpopulation-with-stg" class="section level3">
		<h3 class="hasAnchor">
		<a href="#get-markers-for-each-da-subpopulation-with-stg" class="anchor"></a>Get markers for each DA subpopulation with STG</h3>
		<p>The final step of DAseq is to characterize each DA subpopulation by detecting genes that seprate the DA subpopulation from the rest of the cells through STG (stochastic gates).</p>
		<p>The gene expression data is NOT included in the DAseq package and needs to be downloaded. This code will download the data to the current working directory, and load this data into R.</p>
		<div class="sourceCode" id="cb17"><html><body><pre class="r"><span class="fu"><a href="https://rdrr.io/r/utils/download.file.html">download.file</a></span>(
		<div class="sourceCode" id="cb18"><html><body><pre class="r"><span class="fu"><a href="https://rdrr.io/r/utils/download.file.html">download.file</a></span>(
		<span class="st">"ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE120nnn/GSE120575/suppl/GSE120575_Sade_Feldman_melanoma_single_cells_TPM_GEO.txt.gz"</span>,
		<span class="st">"./GSE120575_Sade_Feldman_melanoma_single_cells_TPM_GEO.txt.gz"</span>
		)
		@@ -235,7 +239,7 @@
		)
		<span class="no">X.data.melanoma</span> <span class="kw"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/matrix.html">as.matrix</a></span>(<span class="no">X.data.melanoma</span>[,-<span class="fl">16292</span>])</pre></body></html></div>
		<p>Then, we will use STG to identify markers for each DA subpopulation. The input parameters for STG are the normalized gene expression matrix, and the output from the function <code><a href="../reference/getDAregion.html">getDAregion()</a></code>.</p>
		<div class="sourceCode" id="cb18"><html><body><pre class="r"><span class="no">STG_markers</span> <span class="kw"><-</span> <span class="fu"><a href="../reference/STGmarkerFinder.html">STGmarkerFinder</a></span>(
		<div class="sourceCode" id="cb19"><html><body><pre class="r"><span class="no">STG_markers</span> <span class="kw"><-</span> <span class="fu"><a href="../reference/STGmarkerFinder.html">STGmarkerFinder</a></span>(
		<span class="kw">X</span> <span class="kw">=</span> <span class="no">X.data.melanoma</span>,
		<span class="kw">da.regions</span> <span class="kw">=</span> <span class="no">da_regions</span>,
		<span class="kw">lambda</span> <span class="kw">=</span> <span class="fl">1.5</span>, <span class="kw">n.runs</span> <span class="kw">=</span> <span class="fl">5</span>, <span class="kw">return.model</span> <span class="kw">=</span> <span class="no">T</span>,
		@@ -251,21 +255,21 @@
		<code>model</code>, the actual model from STG for each DA subpopulation, please refer to the documentation of <code><a href="../reference/STGmarkerFinder.html">STGmarkerFinder()</a></code>
		</li>
		</ul>
		<p>Top markers for DA region 2 are shown:</p>
		<div class="sourceCode" id="cb19"><html><body><pre class="r"><span class="fu"><a href="https://rdrr.io/r/utils/head.html">head</a></span>(<span class="no">STG_markers</span>$<span class="no">da.markers</span><span class="kw">[[</span><span class="st">"1"</span>]])</pre></body></html></div>
		<p>Top markers for DA region 5 are shown:</p>
		<div class="sourceCode" id="cb20"><html><body><pre class="r"><span class="fu"><a href="https://rdrr.io/r/utils/head.html">head</a></span>(<span class="no">STG_markers</span>$<span class="no">da.markers</span><span class="kw">[[</span><span class="st">"5"</span>]])</pre></body></html></div>
		<pre><code>## gene avg_logFC p_value
		## VCAM1 VCAM1 2.378151 0.000000e+00
		## PRF1 PRF1 1.291038 1.192611e-257
		## HAVCR2 HAVCR2 1.513924 1.702221e-247
		## TNFRSF9 TNFRSF9 1.795322 1.622910e-234
		## NKG7 NKG7 1.071042 7.919710e-227
		## CD38 CD38 1.486187 9.654928e-218</code></pre>
		## TCF7 TCF7 1.8865021 4.261047e-122
		## RPL13 RPL13 0.1765772 2.421585e-113
		## RPL3 RPL3 0.2037486 1.516596e-102
		## LEF1 LEF1 2.3623672 6.638429e-102
		## RPS6 RPS6 0.2002730 2.013261e-100
		## RPS3A RPS3A 0.2040410 2.452806e-98</code></pre>
		<p>In the <code>model</code> slot, predictions from STG are available. These predictions are linear combinations of a limited set of genes that best separate the DA subpopulation from the rest of the cells.</p>
		<p>For example, here we will plot the prediction value from STG for DA subpopulation 5: the <code>pred</code> slot of the 5th list in the <code>model</code> slot of the output.</p>
		<div class="sourceCode" id="cb21"><html><body><pre class="r"><span class="fu"><a href="../reference/plotCellScore.html">plotCellScore</a></span>(
		<div class="sourceCode" id="cb22"><html><body><pre class="r"><span class="fu"><a href="../reference/plotCellScore.html">plotCellScore</a></span>(
		<span class="kw">X</span> <span class="kw">=</span> <span class="no">X.2d.melanoma</span>, <span class="kw">score</span> <span class="kw">=</span> <span class="no">STG_markers</span>$<span class="no">model</span><span class="kw">[[</span><span class="st">"5"</span>]]$<span class="no">pred</span>
		)</pre></body></html></div>
		<p><img src="tutorial_files/figure-html/unnamed-chunk-16-1.png" width="480"></p>
		<p><img src="tutorial_files/figure-html/unnamed-chunk-17-1.png" width="480"></p>
		</div>
		</div>
		</div>

docs/articles/tutorial_files/figure-html/unnamed-chunk-10-1.png

0 → 100644

+38.4 KiB

Loading image diff...

docs/articles/tutorial_files/figure-html/unnamed-chunk-13-1.png

0 → 100644

+69.8 KiB

Loading image diff...

Admin message