First steps towards Bioc conformity. (0ed40a7e) · Commits · github_fork / DTUrtle

R/dge_analysis.R

+30 −26

Original line number	Diff line number	Diff line
		@@ -41,12 +41,12 @@ import_dge_counts <- function(files, type, ...){
		}

		if(methods::hasArg("txOut")){
		if(args$txOut==T){
		if(args$txOut==TRUE){
		warning("Unless you exactly know what you are doing, it is not recommended to set txOut to TRUE\nDownstream analysis may fail!")
		}
		}else{
		assertthat::assert_that(methods::hasArg("tx2gene"), msg = "Please provide a 'tx2gene' data frame for gene-level summarisation.")
		args$txOut <- F
		args$txOut <- FALSE
		}

		if(type=="alevin"\|\|type=="bustools"){
		@@ -120,9 +120,9 @@ import_dge_counts <- function(files, type, ...){
		#' @seealso [import_dge_counts()] for correct import of gene-level counts. [combine_to_matrix()] to summarize scRNA counts to one matrix.
		run_deseq2 <- function(counts, pd, id_col=NULL, cond_col, cond_levels=NULL, lfc_threshold=0, sig_threshold=0.01,
		dge_calling_strategy="bulk", subset_feature=NULL, subset_sample=NULL, deseq_opts=list(),
		lfc_shrink_opts=list(), return_dds=F, BPPARAM=BiocParallel::SerialParam()){
		lfc_shrink_opts=list(), return_dds=FALSE, BPPARAM=BiocParallel::SerialParam()){
		if(methods::is(counts, "Seurat")){
		assertthat::assert_that(requireNamespace("Seurat", quietly = T), msg = "The package Seurat is needed if a Seurat object is provided.")
		assertthat::assert_that(requireNamespace("Seurat", quietly = TRUE), msg = "The package Seurat is needed if a Seurat object is provided.")
		assertthat::assert_that(utils::packageVersion("Seurat")>="3.0.0", msg = "At least Version 3 of Seurat is needed. Currently only Seurat 3 objects are supported.")
		assertthat::assert_that(counts@version>="3.0.0", msg = "The provided 'counts' is not a Seurat 3 object. Currently only Seurat 3 objects are supported.")
		counts <- Seurat::GetAssayData(counts)
		@@ -155,17 +155,21 @@ run_deseq2 <- function(counts, pd, id_col=NULL, cond_col, cond_levels=NULL, lfc_

		if(is.null(id_col)){
		assertthat::assert_that(all(rownames(pd) %in% colnames(counts)), msg = "Provided id_col does not match with sample names in counts.")
		samp <- data.frame("sample_id"=rownames(pd), "condition"=pd[[cond_col]], pd[,-c(which(colnames(pd)==cond_col)),drop=F], row.names = NULL, stringsAsFactors = F)
		samp <- data.frame("sample_id"=rownames(pd), "condition"=pd[[cond_col]],
		pd[,-c(which(colnames(pd)==cond_col)),drop=FALSE],
		row.names = NULL, stringsAsFactors = FALSE)
		}else{
		samp <- data.frame("sample_id"=pd[[id_col]], "condition"=pd[[cond_col]], pd[,-c(which(colnames(pd) %in% c(id_col, cond_col))),drop=F], row.names = NULL, stringsAsFactors = F)
		samp <- data.frame("sample_id"=pd[[id_col]], "condition"=pd[[cond_col]],
		pd[,-c(which(colnames(pd) %in% c(id_col, cond_col))),drop=FALSE],
		row.names = NULL, stringsAsFactors = FALSE)
		}

		if(!is.null(subset_feature)\|!is.null(subset_sample)){
		if(is.null(subset_feature)){
		subset_feature <- T
		subset_feature <- TRUE
		}
		if(is.null(subset_sample)){
		subset_sample <- T
		subset_sample <- TRUE
		}
		if(is.character(subset_feature)){
		assertthat::assert_that(all(subset_feature %in% rownames(counts)), msg="Invalid 'subset_feature' names provided.")
		@@ -173,8 +177,8 @@ run_deseq2 <- function(counts, pd, id_col=NULL, cond_col, cond_levels=NULL, lfc_
		if(is.character(subset_sample)){
		assertthat::assert_that(all(subset_sample %in% colnames(counts)), msg="Invalid 'subset_sample' names provided.")
		}
		counts <- counts[subset_feature, subset_sample, drop=F]
		samp <- samp[samp$sample_id %in% colnames(counts),,drop=F]
		counts <- counts[subset_feature, subset_sample, drop=FALSE]
		samp <- samp[samp$sample_id %in% colnames(counts),,drop=FALSE]
		}

		if(is.null(cond_levels)){
		@@ -184,22 +188,22 @@ run_deseq2 <- function(counts, pd, id_col=NULL, cond_col, cond_levels=NULL, lfc_
		message("\nComparing in '", cond_col, "': '", cond_levels[1], "' vs '", cond_levels[2], "'")

		samp$condition <- factor(samp$condition, levels=cond_levels)
		samp <- samp[samp$sample_id %in% colnames(counts),,drop=F]
		counts <- counts[, samp$sample_id, drop=F]
		samp <- samp[samp$sample_id %in% colnames(counts),,drop=FALSE]
		counts <- counts[, samp$sample_id, drop=FALSE]

		#exclude samples not in comparison
		exclude <- as.vector(samp$sample_id[is.na(samp$condition)])
		if(length(exclude)!=0){
		message("Excluding ", ifelse(length(exclude)<10, paste(exclude, collapse=" "), paste(length(exclude), "cells/samples")), " for this comparison!")
		samp <- samp[!is.na(samp$condition),]
		counts <- counts[, !(colnames(counts) %in% exclude), drop=F]
		counts <- counts[, !(colnames(counts) %in% exclude), drop=FALSE]
		}
		message("\nProceed with cells/samples: ",paste0(utils::capture.output(table(samp$condition)), collapse = "\n"))
		assertthat::assert_that(length(levels(samp$condition))==2, msg="No two sample groups left for comparison. Aborting!")


		if(exists("eff_len")){
		eff_len <- eff_len[rownames(counts), colnames(counts), drop=F]
		eff_len <- eff_len[rownames(counts), colnames(counts), drop=FALSE]
		dds <- DESeq2::DESeqDataSetFromTximport(txi = list("counts"=counts, "length"=eff_len, "countsFromAbundance"=cfa),
		colData = samp, design = ~condition)
		}else{
		@@ -228,7 +232,7 @@ run_deseq2 <- function(counts, pd, id_col=NULL, cond_col, cond_levels=NULL, lfc_
		It is advised to update the DESeq2 package to benefit from this development. Falling back to the default fitType for now.")
		use_deseq_opts$fitType <- NULL
		}
		if(!requireNamespace("glmGamPoi", quietly = T)){
		if(!requireNamespace("glmGamPoi", quietly = TRUE)){
		warning("Installation of the bioconductor package 'glmGamPoi' would offer a much faster and more precise GLM estimation method (glmGamPoi).
		It is advised to install the package to benefit from this development. Falling back to the default fitType for now.")
		use_deseq_opts$fitType <- NULL
		@@ -246,7 +250,7 @@ run_deseq2 <- function(counts, pd, id_col=NULL, cond_col, cond_levels=NULL, lfc_
		"parallel"=TRUE,
		"BPPARAM"=BPPARAM)

		if(!requireNamespace("apeglm", quietly = T)){
		if(!requireNamespace("apeglm", quietly = TRUE)){
		warning("Installation of the bioconductor package 'apeglm' would offer a better performing shrinkage estimator.
		It is advised to install the package to benefit from this development. Falling back to the 'normal' estimator.")
		message("Using provided svalue threshold to filter as pvalue threshold.")
		@@ -263,11 +267,11 @@ run_deseq2 <- function(counts, pd, id_col=NULL, cond_col, cond_levels=NULL, lfc_
		threshold_col <- "padj"
		}

		res_all <- as.data.frame(res[!is.na(res[[threshold_col]]),,drop=F])
		res_all <- as.data.frame(res[!is.na(res[[threshold_col]]),,drop=FALSE])
		res_all$gene <- rownames(res_all)
		res_all <- move_columns_to_front(res_all, "gene")
		res_sig <- res_all[res_all[[threshold_col]]<sig_threshold,,drop=F]
		res_sig <- res_sig[order(abs(res_sig$log2FoldChange), decreasing = T),,drop=F]
		res_sig <- res_all[res_all[[threshold_col]]<sig_threshold,,drop=FALSE]
		res_sig <- res_sig[order(abs(res_sig$log2FoldChange), decreasing = TRUE),,drop=FALSE]

		message("Found " ,nrow(res_sig), " significant DEGs.")
		message("\t\tOver-expressed: ", sum(res_sig$log2FoldChange>0))

R/dtu_analysis.R

+39 −35

Original line number	Diff line number	Diff line
		@@ -75,11 +75,11 @@ import_counts <- function(files, type, ...){
		}
		}
		if(methods::hasArg("txOut")){
		if(args$txOut==F){
		if(args$txOut==FALSE){
		warning("Unless you exactly know what you are doing it is not recommended to set txOut to FALSE.\nDownstream analysis may fail!")
		}
		}else{
		args$txOut <- T
		args$txOut <- TRUE
		}
		args$files <- files
		args$type <- type
		@@ -110,7 +110,7 @@ import_counts <- function(files, type, ...){
		combine_to_matrix <- function(tx_list, cell_extensions=NULL, cell_extension_side="append", seurat_obj=NULL, tx2gene=NULL, assay_name="dtutx"){

		if(!is.null(seurat_obj)){
		assertthat::assert_that(requireNamespace("Seurat", quietly = T), msg = "The package Seurat is needed for adding the combined matrix to a seurat object.")
		assertthat::assert_that(requireNamespace("Seurat", quietly = TRUE), msg = "The package Seurat is needed for adding the combined matrix to a seurat object.")
		assertthat::assert_that(utils::packageVersion("Seurat")>="3.0.0", msg = "At least Version 3 of Seurat is needed. Currently only Seurat 3 objects are supported.")
		assertthat::assert_that(methods::is(seurat_obj, "Seurat"), msg = "The provided 'seurat_obj' is not of class Seurat.")
		assertthat::assert_that(seurat_obj@version>="3.0.0", msg = "The provided 'seurat_obj' is not a Seurat 3 object. Currently only Seurat 3 objects are supported.")
		@@ -207,13 +207,13 @@ combine_to_matrix <- function(tx_list, cell_extensions=NULL, cell_extension_side
		message("Subsetting!")
		}
		seurat_obj <- subset(seurat_obj, cells=common_cells)
		tx_list <- tx_list[,common_cells,drop=F]
		tx_list <- tx_list[,common_cells,drop=FALSE]
		}

		#exclude not expressed
		excl_tx <- Matrix::rowSums(tx_list)>0
		message("Excluding ", sum(!excl_tx), " overall not expressed features.")
		tx_list <- tx_list[excl_tx,,drop=F]
		tx_list <- tx_list[excl_tx,,drop=FALSE]
		message(nrow(tx_list), " features left.")

		if(!is.null(seurat_obj)){
		@@ -274,9 +274,9 @@ combine_to_matrix <- function(tx_list, cell_extensions=NULL, cell_extension_side
		#'
		#' @family DTUrtle DTU
		#' @export
		run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=NULL, filtering_strategy="bulk", add_pseudocount=F, BPPARAM=BiocParallel::SerialParam(), force_dense=T, subset_feature=NULL, subset_sample=NULL, carry_over_metadata=T, filter_only=F, ...){
		run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=NULL, filtering_strategy="bulk", add_pseudocount=FALSE, BPPARAM=BiocParallel::SerialParam(), force_dense=TRUE, subset_feature=NULL, subset_sample=NULL, carry_over_metadata=TRUE, filter_only=FALSE, ...){
		if(methods::is(counts, "Seurat")){
		assertthat::assert_that(requireNamespace("Seurat", quietly = T), msg = "The package Seurat is needed for adding the combined matrix to a seurat object.")
		assertthat::assert_that(requireNamespace("Seurat", quietly = TRUE), msg = "The package Seurat is needed for adding the combined matrix to a seurat object.")
		assertthat::assert_that(utils::packageVersion("Seurat")>="3.0.0", msg = "At least Version 3 of Seurat is needed. Currently only Seurat 3 objects are supported.")
		assertthat::assert_that(counts@version>="3.0.0", msg = "The provided 'counts' is not a Seurat 3 object. Currently only Seurat 3 objects are supported.")
		if(is.vector(tx2gene)){
		@@ -308,10 +308,10 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=

		if(!is.null(subset_feature)\|!is.null(subset_sample)){
		if(is.null(subset_feature)){
		subset_feature <- T
		subset_feature <- TRUE
		}
		if(is.null(subset_sample)){
		subset_sample <- T
		subset_sample <- TRUE
		}
		if(is.character(subset_feature)){
		assertthat::assert_that(all(subset_feature %in% rownames(counts)), msg="Invalid 'subset_feature' names provided.")
		@@ -319,7 +319,7 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		if(is.character(subset_sample)){
		assertthat::assert_that(all(subset_sample %in% colnames(counts)), msg="Invalid 'subset_sample' names provided.")
		}
		counts <- counts[subset_feature, subset_sample, drop=F]
		counts <- counts[subset_feature, subset_sample, drop=FALSE]
		}

		assertthat::assert_that(all(rownames(counts) %in% tx2gene[[1]]), msg = "Not all rownames of the counts are present in the first tx2gene column. You may want to reorder the tx2gene columns with 'move_columns_to_front()' or use 'subset_feature' to subset the counts.")
		@@ -329,7 +329,7 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		assertthat::assert_that(nrow(tx2gene)==nrow(counts))
		message("Using tx2gene columns:\n\t",colnames(tx2gene)[[1]]," ---> 'feature_id'\n\t",colnames(tx2gene)[[2]]," ---> 'gene_id'")
		colnames(tx2gene)[c(1,2)] <- c("feature_id", "gene_id")
		colnames(tx2gene) <- make.names(colnames(tx2gene), unique = T)
		colnames(tx2gene) <- make.names(colnames(tx2gene), unique = TRUE)

		if(is.null(cond_levels)){
		cond_levels <- unique(pd[[cond_col]])
		@@ -339,20 +339,24 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=

		if(is.null(id_col)){
		assertthat::assert_that(all(rownames(pd) %in% colnames(counts)), msg = "Provided id_col does not match with sample names in counts.")
		samp <- data.frame("sample_id"=rownames(pd), "condition"=pd[[cond_col]], pd[,-c(which(colnames(pd)==cond_col)),drop=F], row.names = NULL, stringsAsFactors = F)
		samp <- data.frame("sample_id"=rownames(pd), "condition"=pd[[cond_col]],
		pd[,-c(which(colnames(pd)==cond_col)),drop=FALSE],
		row.names = NULL, stringsAsFactors = FALSE)
		}else{
		samp <- data.frame("sample_id"=pd[[id_col]], "condition"=pd[[cond_col]], pd[,-c(which(colnames(pd) %in% c(id_col, cond_col))),drop=F], row.names = NULL, stringsAsFactors = F)
		samp <- data.frame("sample_id"=pd[[id_col]], "condition"=pd[[cond_col]],
		pd[,-c(which(colnames(pd) %in% c(id_col, cond_col))),drop=FALSE],
		row.names = NULL, stringsAsFactors = FALSE)
		}
		samp$condition <- factor(samp$condition, levels=cond_levels)
		samp <- samp[samp$sample_id %in% colnames(counts),,drop=F]
		counts <- counts[, samp$sample_id, drop=F]
		samp <- samp[samp$sample_id %in% colnames(counts),,drop=FALSE]
		counts <- counts[, samp$sample_id, drop=FALSE]

		#exclude samples not in comparison
		exclude <- as.vector(samp$sample_id[is.na(samp$condition)])
		if(length(exclude)!=0){
		message("Excluding ", ifelse(length(exclude)<10, paste(exclude, collapse=" "), paste(length(exclude), "cells/samples")), " for this comparison!")
		samp <- samp[!is.na(samp$condition),]
		counts <- counts[, !(colnames(counts) %in% exclude), drop=F]
		counts <- counts[, !(colnames(counts) %in% exclude), drop=FALSE]
		}
		message("\nProceed with cells/samples: ",paste0(utils::capture.output(table(samp$condition)), collapse = "\n"))
		assertthat::assert_that(length(levels(samp$condition))==2, msg="No two sample groups left for comparison. Aborting!")
		@@ -367,7 +371,7 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		sc={
		smallest_group <- min(table(samp$condition))*0.05
		filter_opt_list <- utils::modifyList(filter_opt_list, list("min_samps_feature_prop" = smallest_group,
		"min_feature_prop" = 0.05, "run_gene_twice" = T))
		"min_feature_prop" = 0.05, "run_gene_twice" = TRUE))
		},
		bulk={
		smallest_group <- min(table(samp$condition))*0.5
		@@ -375,16 +379,16 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		list("min_samps_gene_expr" = smallest_group,
		"min_gene_expr" = 5,
		"min_samps_feature_prop" = smallest_group,
		"min_feature_prop" = 0.05, "run_gene_twice" = T))
		"min_feature_prop" = 0.05, "run_gene_twice" = TRUE))
		},
		own={
		filter_opt_list <- utils::modifyList(filter_opt_list, list(...))
		})
		#force garbage collection before RAM intensive computations.
		x <- gc(verbose=F)
		BiocParallel::bpprogressbar(BPPARAM) <- T
		x <- gc(verbose=FALSE)
		BiocParallel::bpprogressbar(BPPARAM) <- TRUE
		counts <- do.call(sparse_filter, args = c(list("counts" = counts, "tx2gene" = tx2gene, "BPPARAM" = BPPARAM), filter_opt_list), quote=TRUE)
		BiocParallel::bpprogressbar(BPPARAM) <- F
		BiocParallel::bpprogressbar(BPPARAM) <- FALSE

		if(filter_only){
		return(counts)
		@@ -413,11 +417,11 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		tx2gene <- tx2gene[match(rownames(exp_in_tx), tx2gene$feature_id),]
		tx2gene <- tx2gene[,!apply(tx2gene,2,function(x) any(is.na(x)))]
		if(nrow(tx2gene)==nrow(exp_in_tx)&ncol(tx2gene)>2){
		exp_in_tx <- cbind(exp_in_tx, tx2gene[,-c(1,2)], stringsAsFactors = F)
		exp_in_tx <- cbind(exp_in_tx, tx2gene[,-c(1,2)], stringsAsFactors = FALSE)
		add_to_gene_columns <- check_unique_by_partition(tx2gene[,-c(1,2)], drim@counts@partitioning)
		if(!is.null(add_to_gene_columns)){
		tx2gene <- get_by_partition(df = tx2gene, columns =add_to_gene_columns, partitioning = drim@counts@partitioning, FUN=unique, BPPARAM = BPPARAM)
		exp_in_gn <- cbind(exp_in_gn, tx2gene[match(rownames(exp_in_gn), tx2gene[[1]]),-c(1)], stringsAsFactors = F)
		exp_in_gn <- cbind(exp_in_gn, tx2gene[match(rownames(exp_in_gn), tx2gene[[1]]),-c(1)], stringsAsFactors = FALSE)
		}
		}
		}
		@@ -428,7 +432,7 @@ run_drimseq <- function(counts, tx2gene, pd, id_col=NULL, cond_col, cond_levels=
		### do not add uniform distribution to full fit, as it is not added to null fit
		drim_test <- sparseDRIMSeq::dmFit(drim_test, design=design_full, BPPARAM=BPPARAM, add_uniform=add_pseudocount, verbose=1)
		drim_test <- sparseDRIMSeq::dmTest(drim_test, coef=2, BPPARAM=BPPARAM, verbose=1)
		group <- factor(samp$condition, levels = cond_levels, ordered = T)
		group <- factor(samp$condition, levels = cond_levels, ordered = TRUE)

		exp_in_gn <- rapply(exp_in_gn, as.character, classes="factor", how="replace")
		exp_in_tx <- rapply(exp_in_tx, as.character, classes="factor", how="replace")
		@@ -467,7 +471,7 @@ posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){
		assertthat::assert_that(0<=ofdr & ofdr<=1, msg = "The provided 'ofdr' parameter is invalid. Must be a number between [0,1].")

		res <- sparseDRIMSeq::results(dturtle$drim)
		if(posthoc!=F\|\|posthoc>0){
		if(posthoc!=FALSE\|\|posthoc>0){
		res_txp <- run_posthoc(dturtle$drim, posthoc)
		}else{
		res_txp <- sparseDRIMSeq::results(dturtle$drim, level="feature")
		@@ -484,14 +488,14 @@ posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){
		rownames(pconfirm) <- res_txp$feature_id
		tx2gene <- res_txp[,c("feature_id", "gene_id")]

		stageRObj <- stageR::stageRTx(pScreen = pscreen, pConfirmation = pconfirm, pScreenAdjusted = F, tx2gene = tx2gene)
		stageRObj <- stageR::stageRTx(pScreen = pscreen, pConfirmation = pconfirm, pScreenAdjusted = FALSE, tx2gene = tx2gene)
		stageRObj <- stageR::stageWiseAdjustment(stageRObj, method = "dtu", alpha = ofdr)
		fdr_table <- stageR::getAdjustedPValues(stageRObj, order = F, onlySignificantGenes = F)
		fdr_table <- stageR::getAdjustedPValues(stageRObj, order = FALSE, onlySignificantGenes = FALSE)
		fdr_table <- rapply(fdr_table, as.character, classes="factor", how="replace")
		sig_gene <- unique(fdr_table$geneID[fdr_table$gene<ofdr])

		if(length(sig_gene)>0){
		temp <- fdr_table[fdr_table$gene<ofdr&fdr_table$transcript<ofdr,,drop=F]
		temp <- fdr_table[fdr_table$gene<ofdr&fdr_table$transcript<ofdr,,drop=FALSE]
		sig_tx <- stats::setNames(temp$txID, temp$geneID)
		message("Found ",length(sig_gene)," significant genes with ",length(sig_tx)," significant transcripts (OFDR: ",ofdr,")")
		}else{
		@@ -503,7 +507,7 @@ posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){
		return_obj <- append(list("sig_gene" = sig_gene, "sig_tx" = sig_tx,
		"FDR_table" = fdr_table), dturtle)
		return_obj$used_filtering_options$posthoc_stager <- list("ofdr" = ofdr,
		"posthoc"=ifelse(posthoc==F, 0, posthoc))
		"posthoc"=ifelse(posthoc==FALSE, 0, posthoc))
		class(return_obj) <- append("dturtle", class(return_obj))
		return(return_obj)
		}
		@@ -527,7 +531,7 @@ posthoc_and_stager <- function(dturtle, ofdr=0.05, posthoc=0.1){
		#' Thus, DTU effects for transcripts with a low score are less likely to be detectable with the given data.
		#'
		#' @param counts A (sparse) count matrix, where columns represent a sample / cell and rows represent a single transcript isoform. This data is used to infer each gene's reference transcript.
		#' @param gtf A GTF file with gene and exon-level information. Can be a filepath or a previously imported gtf file (as GRanges or data frame). It is advised to read-in the file like this: `gtf <- import_gtf("YOUR_PATH", feature_type = NULL, out_df=F)`.
		#' @param gtf A GTF file with gene and exon-level information. Can be a filepath or a previously imported gtf file (as GRanges or data frame). It is advised to read-in the file like this: `gtf <- import_gtf("YOUR_PATH", feature_type = NULL, out_df=FALSE)`.
		#' @param tx2gene Data frame, where the first column consists of feature identifiers and the second column consists of corresponding gene identifiers. Feature identifiers must match with the rownames of the counts object.
		#' @param priming_enrichment Specify, which end of the mRNA is supposed to be enriched in your (single-cell) RNA-seq protocol. Can be either '3' or '5', for the 3'-end or the 5'-end respectively.
		#' @param genes (Optional) Specify certain genes, that shall be analysed. If `NULL`, defaults to all genes in the provided tx2gene data frame.
		@@ -559,9 +563,9 @@ priming_bias_detection_probability <- function(counts, gtf, tx2gene, one_to_one=
		assertthat::assert_that(methods::is(BPPARAM, "BiocParallelParam"), msg = "Please provide a valid BiocParallelParam object in BPPARAM.")

		if(is.data.frame(gtf)){
		gtf <- GenomicRanges::makeGRangesFromDataFrame(gtf, keep.extra.columns = T)
		gtf <- GenomicRanges::makeGRangesFromDataFrame(gtf, keep.extra.columns = TRUE)
		}else if(is.character(gtf)){
		gtf <- import_gtf(gtf_file = gtf, feature_type = NULL, out_df = F)
		gtf <- import_gtf(gtf_file = gtf, feature_type = NULL, out_df = FALSE)
		}

		assertthat::assert_that(class(gtf) %in% c("GRanges"))
		@@ -603,10 +607,10 @@ priming_bias_detection_probability <- function(counts, gtf, tx2gene, one_to_one=

		#compute mean proportion of each tx to infer the reference tx
		counts <- get_proportion_matrix(counts, tx2gene = tx2gene, genes=valid_genes)
		counts <- Matrix::rowMeans(x=counts, na.rm = T)
		counts <- Matrix::rowMeans(x=counts, na.rm = TRUE)

		if(length(valid_genes)>10){
		BiocParallel::bpprogressbar(BPPARAM) <- T
		BiocParallel::bpprogressbar(BPPARAM) <- TRUE
		}

		message("Scoring transcripts of ", length(valid_genes), " genes.")
		@@ -680,7 +684,7 @@ priming_bias_detection_probability <- function(counts, gtf, tx2gene, one_to_one=
		return(add_to_table)
		}

		return_df <- data.frame("tx"=names(score_list), "detection_probability"=score_list, "used_as_ref"=ref_tx, row.names = NULL, stringsAsFactors = F)
		return_df <- data.frame("tx"=names(score_list), "detection_probability"=score_list, "used_as_ref"=ref_tx, row.names = NULL, stringsAsFactors = FALSE)
		return_df$gene <- tx2gene[[2]][match(return_df$tx, tx2gene[[1]])]
		return_df <- move_columns_to_front(return_df, "gene")

R/sc_support.R

+6 −6

Original line number	Diff line number	Diff line
		@@ -15,8 +15,8 @@ sparse_filter <- function(counts, tx2gene, BPPARAM=BiocParallel::SerialParam(),
		min_samps_feature_prop = 0, min_feature_prop = 0,
		run_gene_twice=FALSE){
		assertthat::assert_that(methods::is(counts, "matrix")\|\|methods::is(counts, "sparseMatrix"))
		counts <- counts[Matrix::rowSums(counts)>0,,drop=F]
		tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),,drop=F]
		counts <- counts[Matrix::rowSums(counts)>0,,drop=FALSE]
		tx2gene <- tx2gene[match(rownames(counts), tx2gene[[1]]),,drop=FALSE]
		#genes with at least two transcripts
		inds <- which(duplicated(tx2gene[[2]]) \| duplicated(tx2gene[[2]], fromLast = TRUE))
		inds <- split(inds, f = tx2gene[inds,2])
		@@ -45,7 +45,7 @@ sparse_filter <- function(counts, tx2gene, BPPARAM=BiocParallel::SerialParam(),
		return(NULL)

		temp <- expr_features[, samps2keep, drop = FALSE]
		prop <- temp %*% Matrix::diag(1/Matrix::colSums(temp), names = F)
		prop <- temp %*% Matrix::diag(1/Matrix::colSums(temp), names = FALSE)

		### features with min proportion
		row_index <- Matrix::rowSums(prop >= min_feature_prop) >= min_samps_feature_prop
		@@ -74,7 +74,7 @@ sparse_filter <- function(counts, tx2gene, BPPARAM=BiocParallel::SerialParam(),
		}
		counts_new <- BiocParallel::bplapply(inds, FUN=filter, BPPARAM=BPPARAM)
		BiocParallel::bpstop(BPPARAM)
		counts_new <- counts[unlist(counts_new),,drop=F]
		counts_new <- counts[unlist(counts_new),,drop=FALSE]
		assertthat::assert_that(nrow(counts_new)>0, msg = "No Features left after filtering. Maybe try more relaxed filtering parameter.")
		message("Retain ",nrow(counts_new), " of ",nrow(counts)," features.\nRemoved ", nrow(counts)-nrow(counts_new), " features.")
		return(counts_new)
		@@ -101,9 +101,9 @@ readin_bustools <- function(files){
		message("\tReading in Matrix.")
		mtx <- Matrix::readMM(mtx)
		message("\tAssigning dimnames.")
		rownames <- scan(rownames, what = "character", quiet=T)
		rownames <- scan(rownames, what = "character", quiet=TRUE)
		assertthat::assert_that(length(rownames)==nrow(mtx), msg = "Number of barcodes does not match to matrix.")
		colnames <- scan(colnames, what = "character", quiet=T)
		colnames <- scan(colnames, what = "character", quiet=TRUE)
		assertthat::assert_that(length(colnames)==ncol(mtx), msg = "Number of features does not match to matrix.")
		dimnames(mtx) <- list(rownames, colnames)
		return(methods::as(Matrix::t(mtx), "dgCMatrix"))

R/utility_functions.R

+18 −18

File changed.

Preview size limit exceeded, changes collapsed.

R/visualization.R

+71 −71

File changed.

Preview size limit exceeded, changes collapsed.

Admin message